mdbx: merge branch master into stable.

Ветка 0.12 считается готовой к продуктовому использованию,
получает статус стабильной и далее будет получать только исправление ошибок.

Разработка будет продолжена в ветке 0.13, а ветка 0.11 становится архивной.
This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2023-03-04 00:00:24 +03:00
commit c81b007587
69 changed files with 15576 additions and 10571 deletions

View File

@ -1,5 +1,5 @@
##
## Copyright 2020-2022 Leonid Yuriev <leo@yuriev.ru>
## Copyright 2020-2023 Leonid Yuriev <leo@yuriev.ru>
## and other libmdbx authors: please see AUTHORS file.
## All rights reserved.
##
@ -235,6 +235,7 @@ if(SUBPROJECT)
if(NOT DEFINED CMAKE_POSITION_INDEPENDENT_CODE)
option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" ON)
endif()
set(MDBX_MANAGE_BUILD_FLAGS_DEFAULT OFF)
else()
option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)" ON)
option(CMAKE_POSITION_INDEPENDENT_CODE "Generate position independent (PIC)" ON)
@ -351,9 +352,14 @@ else()
endif()
endif(NOT MDBX_AMALGAMATED_SOURCE)
setup_compile_flags()
set(MDBX_MANAGE_BUILD_FLAGS_DEFAULT ON)
endif(SUBPROJECT)
option(MDBX_MANAGE_BUILD_FLAGS "Allow libmdbx to configure/manage/override its own build flags" ${MDBX_MANAGE_BUILD_FLAGS_DEFAULT})
if(MDBX_MANAGE_BUILD_FLAGS)
setup_compile_flags()
endif()
list(FIND CMAKE_C_COMPILE_FEATURES c_std_11 HAS_C11)
list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_11 HAS_CXX11)
list(FIND CMAKE_CXX_COMPILE_FEATURES cxx_std_14 HAS_CXX14)
@ -500,16 +506,29 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
if(MDBX_NTDLL_EXTRA_IMPLIB)
add_mdbx_option(MDBX_WITHOUT_MSVC_CRT "Avoid dependence from MSVC CRT and use ntdll.dll instead" OFF)
endif()
set(MDBX_AVOID_MSYNC_DEFAULT ON)
else()
add_mdbx_option(MDBX_USE_OFDLOCKS "Use Open file description locks (aka OFD locks, non-POSIX)" AUTO)
mark_as_advanced(MDBX_USE_OFDLOCKS)
set(MDBX_AVOID_MSYNC_DEFAULT OFF)
endif()
add_mdbx_option(MDBX_LOCKING "Locking method (Win32=-1, SysV=5, POSIX=1988, POSIX=2001, POSIX=2008, Futexes=1995)" AUTO)
option(MDBX_AVOID_MSYNC "Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP mode" ${MDBX_AVOID_MSYNC_DEFAULT})
add_mdbx_option(MDBX_LOCKING "Locking method (Windows=-1, SysV=5, POSIX=1988, POSIX=2001, POSIX=2008, Futexes=1995)" AUTO)
mark_as_advanced(MDBX_LOCKING)
add_mdbx_option(MDBX_TRUST_RTC "Does a system have battery-backed Real-Time Clock or just a fake" AUTO)
mark_as_advanced(MDBX_TRUST_RTC)
option(MDBX_FORCE_ASSERTIONS "Force enable assertion checking" OFF)
option(MDBX_DISABLE_PAGECHECKS "Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB" OFF)
option(MDBX_DISABLE_VALIDATION "Disable some checks to reduce an overhead and detection probability of database corruption to a values closer to the LMDB" OFF)
option(MDBX_ENABLE_REFUND "Zerocost auto-compactification during write-transactions" ON)
option(MDBX_ENABLE_MADVISE "Using POSIX' madvise() and/or similar hints" ON)
if (CMAKE_TARGET_BITNESS GREATER 32)
set(MDBX_BIGFOOT_DEFAULT ON)
else()
set(MDBX_BIGFOOT_DEFAULT OFF)
endif()
option(MDBX_ENABLE_BIGFOOT "Chunking long list of retired pages during huge transactions commit to avoid use sequences of pages" ${MDBX_BIGFOOT_DEFAULT})
option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON)
option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF)
if(NOT MDBX_AMALGAMATED_SOURCE)
if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG")
@ -966,6 +985,7 @@ if (NOT SUBPROJECT)
set(CPACK_PACKAGE_VERSION_COMMIT ${MDBX_VERSION_REVISION})
set(PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${CPACK_PACKAGE_VERSION_COMMIT}")
message(STATUS "libmdbx package version is ${PACKAGE_VERSION}")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/VERSION.txt" "${MDBX_VERSION_MAJOR}.${MDBX_VERSION_MINOR}.${MDBX_VERSION_RELEASE}.${MDBX_VERSION_REVISION}")
endif()
cmake_policy(POP)

View File

@ -1,4 +1,4 @@
Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
Copyright 2011-2015 Howard Chu, Symas Corp.
Copyright 2015,2016 Peter-Service R&D LLC.
All rights reserved.

View File

@ -1,53 +1,447 @@
ChangeLog
---------
## v0.11.14 (Sergey Kapitsa) at 2023-02-14
English version [by Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en)
and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md).
The stable bugfix release in memory of [Sergey Kapitsa](https://en.wikipedia.org/wiki/Sergey_Kapitsa) on his 95th birthday.
## v0.12.4 (Арта-333) от 2023-03-03
Стабилизирующий выпуск с исправлением обнаруженных ошибок, устранением
недочетов и технических долгов. Ветка 0.12 считается готовой к
продуктовому использованию, получает статус стабильной и далее будет
получать только исправление ошибок. Разработка будет продолжена в ветке
0.13, а ветка 0.11 становится архивной.
```
22 files changed, 250 insertions(+), 174 deletions(-)
63 files changed, 1161 insertions(+), 569 deletions(-)
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
```
Благодарности:
- Max <maxc0d3r@protonmail.com> за сообщение о проблеме ERROR_SHARING_VIOLATION
в режиме MDBX_EXCLUSIVE на Windows.
- Alisher Ashyrov <https://t.me/a1is43ras4> за сообщение о проблеме
с assert-проверкой и содействие в отладке.
- Masatoshi Fukunaga <https://gitflic.ru/user/mah0x211> за сообщение о проблеме
`put(MDBX_UPSERT+MDBX_ALLDUPS)` для случая замены всех значений в subDb.
Исправления (без корректировок новых функций):
- Устранен регресс после коммита 474391c83c5f81def6fdf3b0b6f5716a87b78fbf,
приводящий к возврату ERROR_SHARING_VIOLATION в Windows при открытии БД
в режиме MDBX_EXCLUSIVE для чтения-записи.
- Добавлено ограничение размера отображения при коротком read-only файле, для
предотвращения ошибки ERROR_NOT_ENOUGH_MEMORY в Windows, которая возникает
в этом случае и совсем не информативна для пользователя.
- Произведен рефакторинг `dxb_resize()`, в том числе, для устранения срабатывания
assert-проверки `size_bytes == env->me_dxb_mmap.current` в специфических
многопоточных сценариях использования. Проверка срабатывала только в
отладочных сборках, при специфическом наложении во времени читающей и
пишущей транзакции в разных потоках, одновременно с изменением размера БД.
Кроме срабатывание проверки, каких-либо других последствий не возникало.
- Устранена проблема в `put(MDBX_UPSERT+MDBX_ALLDUPS)` для случая замены
всех значений единственного ключа в subDb. В ходе этой операции subDb
становится полностью пустой, без каких-либо страниц и именно эта
ситуация не была учтена в коде, что приводило к повреждению БД
при фиксации такой транзакции.
- Устранена излишняя assert-проверка внутри `override_meta()`.
Что в отладочных сборках могло приводить к ложным срабатываниям
при восстановлении БД, в том числе при автоматическом откате слабых
мета-страниц.
- Скорректированы макросы `__cold`/`__hot`, в том числе для устранения проблемы
`error: inlining failed in call to always_inline FOO(...): target specific option mismatch`
при сборке посредством GCC >10.x для SH4.
Ликвидация технических долгов и мелочи:
- Исправлены многочисленные опечатки в документации.
- Доработан тест для полной стохастической проверки `MDBX_EKEYMISMATCH` в режиме `MDBX_APPEND`.
- Расширены сценарии запуска `mdbx_chk` из CMake-тестов для проверки как в обычном,
так и эксклюзивном режимах чтения-записи.
- Уточнены спецификаторы `const` и `noexcept` для нескольких методов в C++ API.
- Устранено использование стека под буферы для `wchar`-преобразования путей.
- Для Windows добавлена функция `mdbx_env_get_path()` для получения пути к БД
в формате многобайтных символов.
- Добавлены doxygen-описания для API с широкими символами.
- Устранены предупреждения статического анализатора MSVC,
все они были несущественные, либо ложные.
- Устранено ложное предупреждение GCC при сборке для SH4.
- Добавлена поддержка ASAN (Address Sanitizer) при сборке посредством MSVC.
- Расширен набор перебираемых режимов в скрипте `test/long_stochastic.sh`,
добавлена опция `--extra`.
- В C++ API добавлена поддержка расширенных опций времени выполнения `mdbx::extra_runtime_option`,
аналогично `enum MDBX_option_t` из C API.
- Вывод всех счетчиков page-operations в `mdbx_stat`.
-------------------------------------------------------------------------------
## v0.12.3 (Акула) от 2023-01-07
Выпуск с существенными доработками и новой функциональностью в память о закрытом open-source
[проекте "Акула"](https://erigon.substack.com/p/winding-down-support-for-akula-project).
Добавлена prefault-запись, переделан контроль “некогерентности” unified page/buffer cache, изменена тактика слияния страниц и т.д.
Стало ещё быстрее, в некоторых сценариях вдвое.
```
20 files changed, 4508 insertions(+), 2928 deletions(-)
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
```
Благодарности:
- [Alex Sharov](https://t.me/AskAlexSharov) и команде [Erigon](https://github.com/ledgerwatch/erigon) за тестирование.
- [Simon Leier](https://t.me/leisim) за сообщение о сбоях и тестирование.
Новое:
- Использование адреса [https://libmdbx.dqdkfa.ru/dead-github](https://libmdbx.dqdkfa.ru/dead-github)
для отсылки к сохранённым в web.archive.org копиям ресурсов, уничтоженных администрацией Github.
- Реализована prefault-запись при выделении страниц для read-write отображений.
Это приводит к кратному снижению системных издержек и существенному увеличению
производительности в соответствующих сценариях использования, когда:
- размер БД и объём данных существенно больше ОЗУ;
- используется режим `MDBX_WRITEMAP`;
- не-мелкие транзакции (по ходу транзакции выделяется многие сотни или тысячи страниц).
В режиме `MDBX_WRITEMAP` выделение/переиспользование страниц приводит
к page-fault и чтению страницы с диска, даже если содержимое страницы
не нужно (будет перезаписано). Это является следствием работы подсистемы
виртуальной памяти, а штатный способ лечения через `MADV_REMOVE`
работает не на всех ФС и обычно дороже получаемой экономии.
Теперь в libmdbx используется "упреждающая запись" таких страниц,
которая на системах с [unified page cache](https://www.opennet.ru/base/dev/ubc.txt.html)
приводит к "вталкиванию" данных, устраняя необходимость чтения с диска при
обращении к такой странице памяти.
Новый функционал работает в согласованности с автоматическим управлением read-ahead
и кэшем статуса присутствия страниц в ОЗУ, посредством [mincore()](https://man7.org/linux/man-pages/man2/mincore.2.html).
- Добавлена опция `MDBX_opt_prefault_write_enable` для возможности принудительного
включения/выключения prefault-записи.
- Реализован динамический выбор между сквозной записью на диск и обычной записью
с последующим [fdatasync()](https://man7.org/linux/man-pages/man3/fdatasync.3p.html)
управляемый опцией `MDBX_opt_writethrough_threshold`.
В долговечных (durable) режимах данные на диск могут быть сброшены двумя способами:
- сквозной записью через файловый дескриптор открытый с `O_DSYNC`;
- обычной записью с последующим вызовом `fdatasync()`.
Первый способ выгоднее при записи малого количества страниц и/или если
канал взаимодействия с диском/носителем имеет близкую к нулю задержку.
Второй способ выгоднее если требуется записать много страниц и/или канал
взаимодействия имеет весомую задержку (датацентры, облака). Добавленная
опция `MDBX_opt_writethrough_threshold` позволяет во время выполнения
задать порог для динамического выбора способа записи в зависимости от
объема и конкретных условия использования.
- Автоматическая установка `MDBX_opt_rp_augment_limit` в зависимости от размера БД.
- Запрещение разного режима `MDBX_WRITEMAP` между процессами в режимах
с отложенной/ленивой записью, так как в этом случае невозможно
обеспечить сброс данных на диск во всех случаях на всех поддерживаемых платформах.
- Добавлена опция сборки `MDBX_MMAP_USE_MS_ASYNC` позволяющая отключить
использование системного вызова `msync(MS_ASYNC)`, в использовании
которого нет необходимости на подавляющем большинстве актуальных ОС.
По-умолчанию `MDBX_MMAP_USE_MS_ASYNC=0` (выключено) на Linux и других
системах с unified page cache. Такое поведение (без использования
`msync(MS_ASYNC)`) соответствует неизменяемой (hardcoded) логике LMDB. В
результате, в простых/наивных бенчмарках, libmdbx опережает LMDB
примерно также как при реальном применении.
На всякий случай стоит еще раз отметить/напомнить, что на Windows
предположительно libmdbx будет отставать от LMDB в сценариях с
множеством мелких транзакций, так как libmdbx осознанно использует на
Windows файловые блокировки, которые медленные (плохо реализованы в ядре
ОС), но позволяют застраховать пользователей от массы неверных действий
приводящих к повреждению БД.
- Поддержка не-печатных имен для subDb.
- Добавлен явный выбор `tls_model("local-dynamic")` для обхода проблемы
`relocation R_X86_64_TPOFF32 against FOO cannot be used with -shared`
из-за ошибки в CLANG приводящей к использованию неверного режима `ls_model`.
- Изменение тактики слияния страниц при удалении.
Теперь слияние выполняется преимущественно с уже измененной/грязной страницей.
Если же справа и слева обе страницы с одинаковым статусом,
то с наименее заполненной, как прежде. В сценариях с массивным удалением
это позволяет увеличить производительность до 50%.
- Добавлен контроль отсутствия LCK-файлов с альтернативным именованием.
Исправления (без корректировок новых функций):
- Изменение размера отображения если это требуется для сброса данных на
диск при вызове `mdbx_env_sync()` из параллельного потока выполнения вне
работающей транзакции.
- Исправление регресса после коммита db72763de049d6e4546f838277fe83b9081ad1de от 2022-10-08
в логике возврата грязных страниц в режиме `MDBX_WRITEMAP`, из-за чего
освободившиеся страницы использовались не немедленно, а попадали в
retired-список совершаемой транзакции и происходил необоснованный рост
размера транзакции.
- Устранение SIGSEGV или ошибочного вызова `free()` в ситуациях
повторного открытия среды посредством `mdbx_env_open()`.
- Устранение ошибки совершенной в коммите fe20de136c22ed3bc4c6d3f673e79c106e824f60 от 2022-09-18,
в результате чего на Linux в режиме `MDBX_WRITEMAP` никогда не вызывался `msync()`.
Проблема существует только в релизе 0.12.2.
- Добавление подсчета грязных страниц в `MDBX_WRITEMAP` для предоставления посредством `mdbx_txn_info()`
актуальной информации об объеме изменений в процессе транзакций чтения-записи.
- Исправление несущественной опечатки в условиях `#if` определения порядка байт.
- Исправление сборки для случая `MDBX_PNL_ASCENDING=1`.
Ликвидация технических долгов и мелочи:
- Доработка поддержки авто-слияния записей GC внутри `page_alloc_slowpath()`.
- Устранение несущественных предупреждений Coverity.
- Использование единого курсора для поиска в GC.
- Переработка внутренних флагов связанных с выделением страниц из GC.
- Доработка подготовки резерва перед обновлением GC при включенном BigFoot.
- Оптимизация `pnl_merge()` для случаев неперекрывающихся объединяемых списков.
- Оптимизация поддержки отсортированного списка страниц в `dpl_append()`.
- Ускорение работы `mdbx_chk` при обработке пользовательских записей в `@MAIN`.
- Переработка LRU-отметок для спиллинга.
- Переработка контроля "некогерентности" Unified page cache для уменьшения накладных расходов.
- Рефакторинг и микрооптимизация.
-------------------------------------------------------------------------------
## v0.12.2 (Иван Ярыгин) от 2022-11-11
Выпуск с существенными доработками и новой функциональностью
в память о российском борце [Иване Сергеевиче Ярыгине](https://ru.wikipedia.org/wiki/Ярыгин,_Иван_Сергеевич).
На Олимпийских играх в Мюнхене в 1972 году Иван Ярыгин уложил всех соперников на лопатки,
суммарно затратив менее 9 минут. Этот рекорд никем не побит до сих пор.
```
64 files changed, 5573 insertions(+), 2510 deletions(-)
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
```
Новое:
- Поддержка всех основных опций при сборке посредством CMake.
- Требования к CMake понижены до версии 3.0.2 для возможности сборки для устаревших платформ.
- Добавлена возможность профилирования работы GC в сложных и/или нагруженных
сценариях (например Ethereum/Erigon). По-умолчанию соответствующий код отключен,
а для его активации необходимо указать опцию сборки `MDBX_ENABLE_PROFGC=1`.
- Добавлена функция `mdbx_env_warmup()` для "прогрева" БД с возможностью
закрепления страниц в памяти.
В утилиты `mdbx_chk`, `mdbx_copy` и `mdbx_dump` добавлены опции `-u` и `-U`
для активации соответствующего функционала.
- Отключение учета «грязных» страниц в не требующих этого режимах
(`MDBX_WRITEMAP` при `MDBX_AVOID_MSYNC=0`). Доработка позволяет снизить
накладные расходы и была запланирована давно, но откладывалась так как
требовала других изменений.
- Вытеснение из памяти (спиллинг) «грязных» страниц с учетом размера
large/overflow-страниц. Доработка позволяет корректно соблюдать политику
задаваемую опциями `MDBX_opt_txn_dp_limit`,
`MDBX_opt_spill_max_denominator`, `MDBX_opt_spill_min_denominator` и
была запланирована давно, но откладывалась так как требовала других
изменений.
- Для Windows в API добавлены UNICODE-зависимые определения макросов
`MDBX_DATANAME`, `MDBX_LOCKNAME` и `MDBX_LOCK_SUFFIX`.
- Переход на преимущественное использование типа `size_t` для
уменьшения накладных расходов на платформе Эльбрус.
- В API добавлены функции `mdbx_limits_valsize4page_max()` и
`mdbx_env_get_valsize4page_max()` возвращающие максимальный размер в
байтах значения, которое может быть размещена в одной
large/overflow-странице, а не последовательности из двух или более таких
страниц. Для таблиц с поддержкой дубликатов вынос значений на
large/overflow-страницы не поддерживается, поэтому результат совпадает с
`mdbx_limits_valsize_max()`.
- В API добавлены функции `mdbx_limits_pairsize4page_max()`и
`mdbx_env_get_pairsize4page_max()` возвращающие в байтах максимальный
суммарный размер пары ключ-значение для их размещения на одной листовой
страницы, без выноса значения на отдельную large/overflow-страницу. Для
таблиц с поддержкой дубликатов вынос значений на large/overflow-страницы
не поддерживается, поэтому результат определяет максимальный/допустимый
суммарный размер пары ключ-значение.
- Реализовано использование асинхронной (overlapped) записи в Windows,
включая использования небуфферизированного ввода-вывода и `WriteGather()`.
Это позволяет сократить накладные расходы и частично обойти проблемы
Windows с низкой производительностью ввода-вывода, включая большие
задержки `FlushFileBuffers()`. Новый код также обеспечивает консолидацию
записываемых регионов на всех платформах, а на Windows использование
событий (events) сведено к минимум, одновременно с автоматических
использованием `WriteGather()`. Поэтому ожидается существенное снижение
накладных расходов взаимодействия с ОС, а в Windows это ускорение, в
некоторых сценариях, может быть кратным в сравнении с LMDB.
- Добавлена опция сборки `MDBX_AVOID_MSYNC`, которая определяет
поведение libmdbx в режиме `MDBX_WRITE_MAP` (когда данные изменяются
непосредственно в отображенных в ОЗУ страницах БД):
* Если `MDBX_AVOID_MSYNC=0` (по умолчанию на всех системах кроме Windows),
то (как прежде) сохранение данных выполняется посредством `msync()`,
либо `FlushViewOfFile()` на Windows. На платформах с полноценной
подсистемой виртуальной памяти и адекватным файловым вводом-выводом
это обеспечивает минимум накладных расходов (один системный вызов)
и максимальную производительность. Однако, на Windows приводит
к значительной деградации, в том числе из-за того что после
`FlushViewOfFile()` требуется также вызов `FlushFileBuffers()`
с массой проблем и суеты внутри ядра ОС.
* Если `MDBX_AVOID_MSYNC=1` (по умолчанию только на Windows), то
сохранение данных выполняется явной записью в файл каждой измененной
страницы БД. Это требует дополнительных накладных расходов, как
на отслеживание измененных страниц (ведение списков "грязных"
страниц), так и на системные вызовы для их записи.
Кроме этого, с точки зрения подсистемы виртуальной памяти ядра ОС,
страницы БД измененные в ОЗУ и явно записанные в файл, могут либо
оставаться "грязными" и быть повторно записаны ядром ОС позже,
либо требовать дополнительных накладных расходов для отслеживания
PTE (Page Table Entries), их модификации и дополнительного копирования
данных. Тем не менее, по имеющейся информации, на Windows такой путь
записи данных в целом обеспечивает более высокую производительность.
- Улучшение эвристики включения авто-слияния записей GC.
- Изменение формата LCK и семантики некоторых внутренних полей. Версии
libmdbx использующие разный формат не смогут работать с одной БД
одновременно, а только поочередно (LCK-файл переписывается при открытии
первым открывающим БД процессом).
- В `C++` API добавлены методы фиксации транзакции с получением информации
о задержках.
- Added `MDBX_HAVE_BUILT IN_CPU_SUPPORTS` build option to control use GCC's
`__builtin_cpu_supports()` function, which could be unavailable on a fake
OSes (macos, ios, android, etc).
Исправления (без корректировок вышеперечисленных новых функций):
- Устранения ряда предупреждений при сборке посредством MinGW.
- Устранение ложно-положительных сообщений от Valgrind об использовании
не инициализированных данных из-за выравнивающих зазоров в `struct troika`.
- Исправлен возврат неожиданной ошибки `MDBX_BUSY` из функций `mdbx_env_set_option()`,
`mdbx_env_set_syncbytes()` и `mdbx_env_set_syncperiod()`.
- Небольшие исправления для совместимости с CMake 3.8
- Больше контроля и осторожности (паранойи) для страховки от дефектов `mremap()`.
- Костыль для починки сборки со старыми версиями `stdatomic.h` из GNU Lib C,
где макросы `ATOMIC_*_LOCK_FREE` ошибочно переопределяются через функции.
- Использование `fcntl64(F_GETLK64/F_SETLK64/F_SETLKW64)` при наличии.
Это решает проблему срабатывания проверочного утверждения при сборке для
платформ где тип `off_t` шире соответствующих полей `структуры flock`,
используемой для блокировки файлов.
- Доработан сбор информации о задержках при фиксации транзакций:
* Устранено искажение замеров длительности обновления GC
при включении отладочного внутреннего аудита;
* Защита от undeflow-нуля только общей задержки в метриках,
чтобы исключить ситуации, когда сумма отдельных стадий
больше общей длительности.
- Ряд исправлений для устранения срабатываний проверочных утверждения в
отладочных сборках.
- Более осторожное преобразование к типу `mdbx_tid_t` для устранения
предупреждений.
- Исправление лишнего сброса данных на диск в режиме `MDBX_SAFE_NOSYNC`
при обновлении GC.
- Fixed an extra check for `MDBX_APPENDDUP` inside `mdbx_cursor_put()`
which could result in returning `MDBX_EKEYMISMATCH` for valid cases.
- Fixed nasty `clz()` bug (by using `_BitScanReverse()`, only MSVC builds affected).
Мелочи:
- Исторические ссылки cвязанные с удалённым на ~~github~~ проектом перенаправлены на [web.archive.org](https://web.archive.org/web/https://github.com/erthink/libmdbx).
- Синхронизированны конструкции CMake между проектами.
- Добавлено предупреждение о небезопасности RISC-V.
- Добавлено описание параметров `MDBX_debug_func` и `MDBX_debug_func`.
- Добавлено обходное решение для минимизации ложно-положительных
конфликтов при использовании файловых блокировок в Windows.
- Проверка атомарности C11-операций c 32/64-битными данными.
- Уменьшение в 42 раза значения по-умолчанию для `me_options.dp_limit`
в отладочных сборках.
- Добавление платформы `gcc-riscv64-linux-gnu` в список для цели `cross-gcc`.
- Небольшие правки скрипта `long_stochastic.sh` для работы в Windows.
- Удаление ненужного вызова `LockFileEx()` внутри `mdbx_env_copy()`.
- Добавлено описание использования файловых дескрипторов в различных режимах.
- Добавлено использование `_CrtDbgReport()` в отладочных сборках.
- Fixed an extra ensure/assertion check of `oldest_reader` inside `txn_end()`.
- Removed description of deprecated usage of `MDBX_NODUPDATA`.
- Fixed regression ASAN/Valgring-enabled builds.
- Fixed minor MingGW warning.
-------------------------------------------------------------------------------
## v0.12.1 (Positive Proxima) at 2022-08-24
The planned frontward release with new superior features on the day of 20 anniversary of [Positive Technologies](https://ptsecurty.com).
```
37 files changed, 7604 insertions(+), 7417 deletions(-)
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
```
New:
- The `Big Foot` feature which significantly reduces GC overhead for processing large lists of retired pages from huge transactions.
Now _libmdbx_ avoid creating large chunks of PNLs (page number lists) which required a long sequences of free pages, aka large/overflow pages.
Thus avoiding searching, allocating and storing such sequences inside GC.
- Improved hot/online validation and checking of database pages both for more robustness and performance.
- New solid and fast method to latch meta-pages called `Troika`.
The minimum of memory barriers, reads, comparisons and conditional transitions are used.
- New `MDBX_VALIDATION` environment options to extra validation of DB structure and pages content for carefully/safe handling damaged or untrusted DB.
- Accelerated ×16/×8/×4 by AVX512/AVX2/SSE2/Neon implementations of search page sequences.
- Added the `gcrtime_seconds16dot16` counter to the "Page Operation Statistics" that accumulates time spent for GC searching and reclaiming.
- Copy-with-compactification now clears/zeroes unused gaps inside database pages.
- The `C` and `C++` APIs has been extended and/or refined to simplify using `wchar_t` pathnames.
On Windows the `mdbx_env_openW()`, ``mdbx_env_get_pathW()`()`, `mdbx_env_copyW()`, `mdbx_env_open_for_recoveryW()` are available for now,
but the `mdbx_env_get_path()` has been replaced in favor of `mdbx_env_get_pathW()`.
- Added explicit error message for Buildroot's Microblaze toolchain maintainers.
- Added `MDBX_MANAGE_BUILD_FLAGS` build options for CMake.
- Speed-up internal `bsearch`/`lower_bound` implementation using branchless tactic, including workaround for CLANG x86 optimiser bug.
- A lot internal refinement and micro-optimisations.
- Internally counted volume of dirty pages (unused for now but for coming features).
Fixes:
- backport: Fixed insignificant typo of `||` inside `#if` byte-order condition.
- backport: Fixed `SIGSEGV` or an erroneous call to `free()` in situations where
errors occur when reopening by `mdbx_env_open()` of a previously used
environment.
- backport: Fixed `cursor_put_nochecklen()` internals for case when dupsort'ed named subDb
contains a single key with multiple values (aka duplicates), which are replaced
with a single value by put-operation with the `MDBX_UPSERT+MDBX_ALLDUPS` flags.
In this case, the database becomes completely empty, without any pages.
However exactly this condition was not considered and thus wasn't handled correctly.
See [issue#8](https://gitflic.ru/project/erthink/libmdbx/issue/8) for more information.
- backport: Fixed extra assertion inside `override_meta()`, which could
lead to false-positive failing of the assertion in a debug builds during
DB recovery and auto-rollback.
- backport: Refined the `__cold`/`__hot` macros to avoid the
`error: inlining failed in call to always_inline FOO(...): target specific option mismatch`
issue during build using GCC >10.x for SH4 arch.
Minors:
- backport: Using the https://libmdbx.dqdkfa.ru/dead-github
for resources deleted by the Github' administration.
- backport: Fixed English typos.
- backport: Fixed proto of `__asan_default_options()`.
- backport: Fixed doxygen-description of C++ API, especially of C++20 concepts.
- backport: Refined `const` and `noexcept` for few C++ API methods.
- backport: Fixed copy&paste typo of "Getting started".
- backport: Update MithrilDB status.
- backport: Resolve false-posirive `used uninitialized` warning from GCC >10.x
while build for SH4 arch.
- Never use modern `__cxa_thread_atexit()` on Apple's OSes.
- Don't check owner for finished transactions.
- Fixed typo in `MDBX_EINVAL` which breaks MingGW builds with CLANG.
## v0.11.13 (Swashplate) at 2022-11-10
## v0.12.0 at 2022-06-19
Not a release but preparation for changing feature set and API.
-------------------------------------------------------------------------------
## v0.11.13 at (Swashplate) 2022-11-10
The stable bugfix release in memory of [Boris Yuryev](https://ru.wikipedia.org/wiki/Юрьев,_Борис_Николаевич) on his 133rd birthday.
@ -113,13 +507,15 @@ Fixes:
- Fixed derived C++ builds by removing `MDBX_INTERNAL_FUNC` for `mdbx_w2mb()` and `mdbx_mb2w()`.
-------------------------------------------------------------------------------
## v0.11.10 (the TriColor) at 2022-08-22
The stable bugfix release.
```
14 files changed, 263 insertions(+), 252 deletions(-)
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
```
New:
- The C++ API has been refined to simplify support for `wchar_t` in path names.
@ -141,8 +537,6 @@ Minors:
- Minor clarified `iov_page()` failure case.
-------------------------------------------------------------------------------
## v0.11.9 (Чирчик-1992) at 2022-08-02
@ -259,7 +653,7 @@ New:
- Support build by MinGW' make from command line without CMake.
- Added `mdbx::filesystem` C++ API namespace that corresponds to `std::filesystem` or `std::experimental::filesystem`.
- Created [website](https://libmdbx.dqdkfa.ru/) for online auto-generated documentation.
- Used `https://web.archive.org/web/20220414235959/https://github.com/erthink/` for dead (or temporarily lost) resources deleted by ~~Github~~.
- Used `https://web.archive.org/web/https://github.com/erthink/libmdbx` for dead (or temporarily lost) resources deleted by ~~Github~~.
- Added `--loglevel=` command-line option to the `mdbx_test` tool.
- Added few fast smoke-like tests into CMake builds.

View File

@ -55,6 +55,7 @@ LD ?= ld
# build options
MDBX_BUILD_OPTIONS ?=-DNDEBUG=1
MDBX_BUILD_TIMESTAMP ?=$(shell date +%Y-%m-%dT%H:%M:%S%z)
MDBX_BUILD_CXX ?= YES
# probe and compose common compiler flags with variable expansion trick (seems this work two times per session for GNU Make 3.81)
CFLAGS ?= $(strip $(eval CFLAGS := -std=gnu11 -O2 -g -Wall -Werror -Wextra -Wpedantic -ffunction-sections -fPIC -fvisibility=hidden -pthread -Wno-error=attributes $$(shell for opt in -fno-semantic-interposition -Wno-unused-command-line-argument -Wno-tautological-compare; do [ -z "$$$$($(CC) '-DMDBX_BUILD_FLAGS="probe"' $$$${opt} -c $(SRC_PROBE_C) -o /dev/null >/dev/null 2>&1 || echo failed)" ] && echo "$$$${opt} "; done)$(CFLAGS_EXTRA))$(CFLAGS))
@ -127,6 +128,9 @@ TIP := // TIP:
.PHONY: all help options lib libs tools clean install uninstall check_buildflags_tag tools-static
.PHONY: install-strip install-no-strip strip libmdbx mdbx show-options lib-static lib-shared
boolean = $(if $(findstring $(strip $($1)),YES Yes yes y ON On on 1 true True TRUE),1,$(if $(findstring $(strip $($1)),NO No no n OFF Off off 0 false False FALSE),,$(error Wrong value `$($1)` of $1 for YES/NO option)))
select_by = $(if $(call boolean,$(1)),$(2),$(3))
ifeq ("$(origin V)", "command line")
MDBX_BUILD_VERBOSE := $(V)
endif
@ -134,7 +138,7 @@ ifndef MDBX_BUILD_VERBOSE
MDBX_BUILD_VERBOSE := 0
endif
ifeq ($(MDBX_BUILD_VERBOSE),1)
ifeq ($(call boolean,MDBX_BUILD_VERBOSE),1)
QUIET :=
HUSH :=
$(info $(TIP) Use `make V=0` for quiet.)
@ -193,12 +197,12 @@ help:
show-options:
@echo " MDBX_BUILD_OPTIONS = $(MDBX_BUILD_OPTIONS)"
@echo " MDBX_BUILD_CXX = $(MDBX_BUILD_CXX)"
@echo " MDBX_BUILD_TIMESTAMP = $(MDBX_BUILD_TIMESTAMP)"
@echo '$(TIP) Use `make options` to listing available build options.'
@echo " CC =`which $(CC)` | `$(CC) --version | head -1`"
@echo " CFLAGS =$(CFLAGS)"
@echo " CXXFLAGS =$(CXXFLAGS)"
@echo " LDFLAGS =$(LDFLAGS) $(LIB_STDCXXFS) $(LIBS) $(EXE_LDFLAGS)"
@echo $(call select_by,MDBX_BUILD_CXX," CXX =`which $(CXX)` | `$(CXX) --version | head -1`"," CC =`which $(CC)` | `$(CC) --version | head -1`")
@echo $(call select_by,MDBX_BUILD_CXX," CXXFLAGS =$(CXXFLAGS)"," CFLAGS =$(CFLAGS)")
@echo $(call select_by,MDBX_BUILD_CXX," LDFLAGS =$(LDFLAGS) $(LIB_STDCXXFS) $(LIBS) $(EXE_LDFLAGS)"," LDFLAGS =$(LDFLAGS) $(LIBS) $(EXE_LDFLAGS)")
@echo '$(TIP) Use `make help` to listing available targets.'
options:
@ -254,7 +258,7 @@ clean:
config.h src/config.h src/version.c *.tar* buildflags.tag \
mdbx_*.static mdbx_*.static-lto
MDBX_BUILD_FLAGS =$(strip $(MDBX_BUILD_OPTIONS) $(CXXSTD) $(CFLAGS) $(LDFLAGS) $(LIBS))
MDBX_BUILD_FLAGS =$(strip MDBX_BUILD_CXX=$(MDBX_BUILD_CXX) $(MDBX_BUILD_OPTIONS) $(call select_by,MDBX_BUILD_CXX,$(CXXFLAGS) $(LDFLAGS) $(LIB_STDCXXFS) $(LIBS),$(CFLAGS) $(LDFLAGS) $(LIBS)))
check_buildflags_tag:
$(QUIET)if [ "$(MDBX_BUILD_FLAGS)" != "$$(cat buildflags.tag 2>&1)" ]; then \
echo -n " CLEAN for build with specified flags..." && \
@ -264,13 +268,13 @@ check_buildflags_tag:
buildflags.tag: check_buildflags_tag
lib-static libmdbx.a: mdbx-static.o mdbx++-static.o
lib-static libmdbx.a: mdbx-static.o $(call select_by,MDBX_BUILD_CXX,mdbx++-static.o)
@echo ' AR $@'
$(QUIET)$(AR) rcs $@ $? $(HUSH)
lib-shared libmdbx.$(SO_SUFFIX): mdbx-dylib.o mdbx++-dylib.o
lib-shared libmdbx.$(SO_SUFFIX): mdbx-dylib.o $(call select_by,MDBX_BUILD_CXX,mdbx++-dylib.o)
@echo ' LD $@'
$(QUIET)$(CXX) $(CXXFLAGS) $^ -pthread -shared $(LDFLAGS) $(LIB_STDCXXFS) $(LIBS) -o $@
$(QUIET)$(call select_by,MDBX_BUILD_CXX,$(CXX) $(CXXFLAGS),$(CC) $(CFLAGS)) $^ -pthread -shared $(LDFLAGS) $(call select_by,MDBX_BUILD_CXX,$(LIB_STDCXXFS)) $(LIBS) -o $@
#> dist-cutoff-begin
ifeq ($(wildcard mdbx.c),mdbx.c)
@ -349,9 +353,9 @@ TEST_DB ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.d
TEST_LOG ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.log
TEST_OSAL := $(shell $(uname2osal))
TEST_ITER := $(shell $(uname2titer))
TEST_SRC := test/osal-$(TEST_OSAL).cc $(filter-out $(wildcard test/osal-*.cc), $(wildcard test/*.cc))
TEST_INC := $(wildcard test/*.h)
TEST_OBJ := $(patsubst %.cc,%.o,$(TEST_SRC))
TEST_SRC := test/osal-$(TEST_OSAL).c++ $(filter-out $(wildcard test/osal-*.c++),$(wildcard test/*.c++)) $(call select_by,MDBX_BUILD_CXX,,src/mdbx.c++)
TEST_INC := $(wildcard test/*.h++)
TEST_OBJ := $(patsubst %.c++,%.o,$(TEST_SRC))
TAR ?= $(shell which gnu-tar || echo tar)
ZIP ?= $(shell which zip || echo "echo 'Please install zip'")
CLANG_FORMAT ?= $(shell (which clang-format-14 || which clang-format-13 || which clang-format) 2>/dev/null)
@ -359,7 +363,7 @@ CLANG_FORMAT ?= $(shell (which clang-format-14 || which clang-format-13 || which
reformat:
@echo ' RUNNING clang-format...'
$(QUIET)if [ -n "$(CLANG_FORMAT)" ]; then \
git ls-files | grep -E '\.(c|cxx|cc|cpp|h|hxx|hpp)(\.in)?$$' | xargs -r $(CLANG_FORMAT) -i --style=file; \
git ls-files | grep -E '\.(c|c++|h|h++)(\.in)?$$' | xargs -r $(CLANG_FORMAT) -i --style=file; \
else \
echo "clang-format version 13..14 not found for 'reformat'"; \
fi
@ -382,11 +386,11 @@ MDBX_SMOKE_EXTRA ?=
check: DESTDIR = $(shell pwd)/@check-install
check: test dist install
smoke-assertion: MDBX_BUILD_OPTIONS=-DMDBX_FORCE_ASSERTIONS=1
smoke-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1)
smoke-assertion: smoke
test-assertion: MDBX_BUILD_OPTIONS=-DMDBX_FORCE_ASSERTIONS=1
test-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1)
test-assertion: smoke
long-test-assertion: MDBX_BUILD_OPTIONS=-DMDBX_FORCE_ASSERTIONS=1
long-test-assertion: MDBX_BUILD_OPTIONS:=$(strip $(MDBX_BUILD_OPTIONS) -DMDBX_FORCE_ASSERTIONS=1)
long-test-assertion: smoke
smoke: build-test
@ -414,7 +418,7 @@ smoke-fault: build-test
test: build-test
@echo ' RUNNING `test/long_stochastic.sh --loops 2`...'
$(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG)
$(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false)
long-test: build-test
@echo ' RUNNING `test/long_stochastic.sh --loops 42`...'
@ -422,12 +426,12 @@ long-test: build-test
test-singleprocess: build-test
@echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...'
$(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG)
$(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false)
test-valgrind: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND
test-valgrind: build-test
@echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...'
$(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG)
$(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false)
memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt
memcheck: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND
@ -448,7 +452,7 @@ gcc-analyzer:
test-ubsan:
@echo ' RE-TEST with `-fsanitize=undefined` option...'
$(QUIET)$(MAKE) IOARENA=false CXXSTD=$(CXXSTD) CFLAGS_EXTRA="-Ofast -fsanitize=undefined -fsanitize-undefined-trap-on-error" test
$(QUIET)$(MAKE) IOARENA=false CXXSTD=$(CXXSTD) CFLAGS_EXTRA="-DENABLE_UBSAN -Ofast -fsanitize=undefined -fsanitize-undefined-trap-on-error" test
test-asan:
@echo ' RE-TEST with `-fsanitize=address` option...'
@ -465,7 +469,7 @@ mdbx_example: mdbx.h example/example-mdbx.c libmdbx.$(SO_SUFFIX)
build-test: all mdbx_example mdbx_test
define test-rule
$(patsubst %.cc,%.o,$(1)): $(1) $(TEST_INC) $(HEADERS) $(lastword $(MAKEFILE_LIST))
$(patsubst %.c++,%.o,$(1)): $(1) $(TEST_INC) $(HEADERS) $(lastword $(MAKEFILE_LIST))
@echo ' CC $$@'
$(QUIET)$$(CXX) $$(CXXFLAGS) $$(MDBX_BUILD_OPTIONS) -c $(1) -o $$@
@ -711,23 +715,23 @@ endif
################################################################################
# Cross-compilation simple test
CROSS_LIST = mips-linux-gnu-gcc \
CROSS_LIST = \
mips64-linux-gnuabi64-gcc mips-linux-gnu-gcc \
hppa-linux-gnu-gcc s390x-linux-gnu-gcc \
powerpc64-linux-gnu-gcc powerpc-linux-gnu-gcc \
arm-linux-gnueabihf-gcc aarch64-linux-gnu-gcc \
sh4-linux-gnu-gcc mips64-linux-gnuabi64-gcc \
hppa-linux-gnu-gcc s390x-linux-gnu-gcc
arm-linux-gnueabihf-gcc aarch64-linux-gnu-gcc
## On Ubuntu Focal (20.04) with QEMU 4.2 (1:4.2-3ubuntu6.6) & GCC 9.3 (9.3.0-17ubuntu1~20.04)
# hppa-linux-gnu-gcc - works (previously: don't supported by qemu)
# s390x-linux-gnu-gcc - works (previously: qemu hang/abort)
## On Ubuntu Focal (22.04) with QEMU 6.2 (1:6.2+dfsg-2ubuntu6.6) & GCC 11.3 (11.3.0-1ubuntu1~22.04)
# sh4-linux-gnu-gcc - coredump (qemu mmap-troubles)
# sparc64-linux-gnu-gcc - coredump (qemu mmap-troubles, previously: qemu fails fcntl for F_SETLK/F_GETLK)
# alpha-linux-gnu-gcc - coredump (qemu mmap-troubles)
CROSS_LIST_NOQEMU = sparc64-linux-gnu-gcc alpha-linux-gnu-gcc
# risc64-linux-gnu-gcc - coredump (qemu qemu fails fcntl for F_SETLK/F_GETLK)
CROSS_LIST_NOQEMU = sh4-linux-gnu-gcc sparc64-linux-gnu-gcc alpha-linux-gnu-gcc riscv64-linux-gnu-gcc
cross-gcc:
@echo ' Re-building by cross-compiler for: $(CROSS_LIST_NOQEMU) $(CROSS_LIST)'
@echo "CORRESPONDING CROSS-COMPILERs ARE REQUIRED."
@echo "FOR INSTANCE: apt install g++-aarch64-linux-gnu g++-alpha-linux-gnu g++-arm-linux-gnueabihf g++-hppa-linux-gnu g++-mips-linux-gnu g++-mips64-linux-gnuabi64 g++-powerpc-linux-gnu g++-powerpc64-linux-gnu g++-s390x-linux-gnu g++-sh4-linux-gnu g++-sparc64-linux-gnu"
@echo "FOR INSTANCE: sudo apt install \$$(apt list 'g++-*' | grep 'g++-[a-z0-9]\+-linux-gnu/' | cut -f 1 -d / | sort -u)"
$(QUIET)for CC in $(CROSS_LIST_NOQEMU) $(CROSS_LIST); do \
echo "===================== $$CC"; \
$(MAKE) IOARENA=false CXXSTD= clean && CC=$$CC CXX=$$(echo $$CC | sed 's/-gcc/-g++/') EXE_LDFLAGS=-static $(MAKE) IOARENA=false all || exit $$?; \
@ -739,8 +743,8 @@ cross-qemu:
@echo ' Re-building by cross-compiler and re-check by QEMU for: $(CROSS_LIST)'
@echo "CORRESPONDING CROSS-COMPILERs AND QEMUs ARE REQUIRED."
@echo "FOR INSTANCE: "
@echo " 1) apt install g++-aarch64-linux-gnu g++-alpha-linux-gnu g++-arm-linux-gnueabihf g++-hppa-linux-gnu g++-mips-linux-gnu g++-mips64-linux-gnuabi64 g++-powerpc-linux-gnu g++-powerpc64-linux-gnu g++-s390x-linux-gnu g++-sh4-linux-gnu g++-sparc64-linux-gnu"
@echo " 2) apt install binfmt-support qemu-user-static qemu-user qemu-system-arm qemu-system-mips qemu-system-misc qemu-system-ppc qemu-system-sparc"
@echo " 1) sudo apt install \$$(apt list 'g++-*' | grep 'g++-[a-z0-9]\+-linux-gnu/' | cut -f 1 -d / | sort -u)"
@echo " 2) sudo apt install binfmt-support qemu-user-static qemu-user \$$(apt list 'qemu-system-*' | grep 'qemu-system-[a-z0-9]\+/' | cut -f 1 -d / | sort -u)"
$(QUIET)for CC in $(CROSS_LIST); do \
echo "===================== $$CC + qemu"; \
$(MAKE) IOARENA=false CXXSTD= clean && \
@ -784,7 +788,7 @@ IOARENA := $(shell \
(test -x ../ioarena/@BUILD/src/ioarena && echo ../ioarena/@BUILD/src/ioarena) || \
(test -x ../../@BUILD/src/ioarena && echo ../../@BUILD/src/ioarena) || \
(test -x ../../src/ioarena && echo ../../src/ioarena) || which ioarena 2>&- || \
(echo false && echo '$(TIP) Clone and build the https://github.com/pmwkaa/ioarena.git within a neighbouring directory for availability of benchmarking.' >&2))
(echo false && echo '$(TIP) Clone and build the https://abf.io/erthink/ioarena.git within a neighbouring directory for availability of benchmarking.' >&2))
endif
NN ?= 25000000
BENCH_CRUD_MODE ?= nosync
@ -798,7 +802,7 @@ re-bench: bench-clean bench
ifeq ($(or $(IOARENA),false),false)
bench bench-quartet bench-triplet bench-couple:
$(QUIET)echo 'The `ioarena` benchmark is required.' >&2 && \
echo 'Please clone and build the https://github.com/pmwkaa/ioarena.git within a neighbouring `ioarena` directory.' >&2 && \
echo 'Please clone and build the https://abf.io/erthink/ioarena.git within a neighbouring `ioarena` directory.' >&2 && \
false
else
@ -809,15 +813,20 @@ define bench-rule
bench-$(1)_$(2).txt: $(3) $(IOARENA) $(lastword $(MAKEFILE_LIST))
@echo ' RUNNING ioarena for $1/$2...'
$(QUIET)(export LD_LIBRARY_PATH="./:$$$${LD_LIBRARY_PATH}"; \
ldd $(IOARENA) && \
ldd $(IOARENA) | grep -i $(1) && \
$(IOARENA) -D $(1) -B batch -m $(BENCH_CRUD_MODE) -n $(2) \
| tee $$@ | grep throughput | sed 's/throughput/batch×N/' && \
$(IOARENA) -D $(1) -B crud -m $(BENCH_CRUD_MODE) -n $(2) \
| tee $$@ | grep throughput && \
| tee -a $$@ | grep throughput | sed 's/throughput/ crud/' && \
$(IOARENA) -D $(1) -B iterate,get,iterate,get,iterate -m $(BENCH_CRUD_MODE) -r 4 -n $(2) \
| tee -a $$@ | grep throughput \
) || mv -f $$@ $$@.error
| tee -a $$@ | grep throughput | sed '0,/throughput/{s/throughput/iterate/};s/throughput/ get/' && \
$(IOARENA) -D $(1) -B delete -m $(BENCH_CRUD_MODE) -n $(2) \
| tee -a $$@ | grep throughput | sed 's/throughput/ delete/' && \
true) || mv -f $$@ $$@.error
endef
$(eval $(call bench-rule,mdbx,$(NN),libmdbx.$(SO_SUFFIX)))
$(eval $(call bench-rule,sophia,$(NN)))

View File

@ -277,7 +277,7 @@ the user's point of view.
> and up to 30% faster when _libmdbx_ compiled with specific build options
> which downgrades several runtime checks to be match with LMDB behaviour.
>
> These and other results could be easily reproduced with [ioArena](https://github.com/pmwkaa/ioarena) just by `make bench-quartet` command,
> These and other results could be easily reproduced with [ioArena](https://abf.io/erthink/ioarena.git) just by `make bench-quartet` command,
> including comparisons with [RockDB](https://en.wikipedia.org/wiki/RocksDB)
> and [WiredTiger](https://en.wikipedia.org/wiki/WiredTiger).
@ -382,7 +382,7 @@ named mutexes are used.
Historically, _libmdbx_ is a deeply revised and extended descendant of the
[Lightning Memory-Mapped Database](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database).
At first the development was carried out within the
[ReOpenLDAP](https://web.archive.org/web/20220414235959/https://github.com/erthink/ReOpenLDAP) project. About a
[ReOpenLDAP](https://web.archive.org/web/https://github.com/erthink/ReOpenLDAP) project. About a
year later _libmdbx_ was separated into a standalone project, which was
[presented at Highload++ 2015
conference](http://www.highload.ru/2015/abstracts/1831.html).
@ -659,7 +659,7 @@ Bindings
Performance comparison
======================
All benchmarks were done in 2015 by [IOArena](https://github.com/pmwkaa/ioarena)
All benchmarks were done in 2015 by [IOArena](https://abf.io/erthink/ioarena.git)
and multiple [scripts](https://github.com/pmwkaa/ioarena/tree/HL%2B%2B2015)
runs on Lenovo Carbon-2 laptop, i7-4600U 2.1 GHz (2 physical cores, 4 HyperThreading cores), 8 Gb RAM,
SSD SAMSUNG MZNTD512HAGL-000L1 (DXT23L0Q) 512 Gb.

12
TODO.md
View File

@ -11,15 +11,19 @@ For the same reason ~~Github~~ is blacklisted forever.
So currently most of the links are broken due to noted malicious ~~Github~~ sabotage.
- [Engage an "overlapped I/O" on Windows](https://libmdbx.dqdkfa.ru/dead-github/issues/224).
- [Simple careful mode for working with corrupted DB](https://libmdbx.dqdkfa.ru/dead-github/issues/223).
- [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204).
- [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOTLS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210).
- [More flexible support of asynchronous runtime/framework(s)](https://libmdbx.dqdkfa.ru/dead-github/issues/200).
- [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199).
- [Get rid of dirty-pages list in MDBX_WRITEMAP mode](https://libmdbx.dqdkfa.ru/dead-github/issues/193).
- [Large/Overflow pages accounting for dirty-room](https://libmdbx.dqdkfa.ru/dead-github/issues/192).
- [Support for RAW devices](https://libmdbx.dqdkfa.ru/dead-github/issues/124).
- [Support MessagePack for Keys & Values](https://libmdbx.dqdkfa.ru/dead-github/issues/115).
- [Engage new terminology](https://libmdbx.dqdkfa.ru/dead-github/issues/137).
- Packages for [Astra Linux](https://astralinux.ru/), [ALT Linux](https://www.altlinux.org/), [ROSA Linux](https://www.rosalinux.ru/), etc.
Done
----
- [Simple careful mode for working with corrupted DB](https://libmdbx.dqdkfa.ru/dead-github/issues/223).
- [Engage an "overlapped I/O" on Windows](https://libmdbx.dqdkfa.ru/dead-github/issues/224).
- [Large/Overflow pages accounting for dirty-room](https://libmdbx.dqdkfa.ru/dead-github/issues/192).
- [Get rid of dirty-pages list in MDBX_WRITEMAP mode](https://libmdbx.dqdkfa.ru/dead-github/issues/193).

View File

@ -1,4 +1,4 @@
## Copyright (c) 2012-2022 Leonid Yuriev <leo@yuriev.ru>.
## Copyright (c) 2012-2023 Leonid Yuriev <leo@yuriev.ru>.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
@ -348,6 +348,8 @@ endif()
if(MSVC)
check_compiler_flag("/WX" CC_HAS_WERROR)
check_compiler_flag("/fsanitize=address" CC_HAS_ASAN)
check_compiler_flag("/fsanitize=undefined" CC_HAS_UBSAN)
else()
#
# GCC started to warn for unused result starting from 4.2, and
@ -839,19 +841,26 @@ macro(setup_compile_flags)
endif()
if(ENABLE_ASAN)
if(NOT MSVC)
add_compile_flags("C;CXX" "-fsanitize=address")
else()
add_compile_flags("C;CXX" "/fsanitize=address")
endif()
add_definitions(-DASAN_ENABLED=1)
endif()
if(ENABLE_UBSAN)
if(NOT MSVC)
add_compile_flags("C;CXX" "-fsanitize=undefined" "-fsanitize-undefined-trap-on-error")
else()
add_compile_flags("C;CXX" "/fsanitize=undefined")
endif()
add_definitions(-DUBSAN_ENABLED=1)
endif()
if(ENABLE_GCOV)
if(NOT HAVE_GCOV)
message(FATAL_ERROR
"ENABLE_GCOV option requested but gcov library is not found")
message(FATAL_ERROR "ENABLE_GCOV option requested but gcov library is not found")
endif()
add_compile_flags("C;CXX" "-fprofile-arcs" "-ftest-coverage")

View File

@ -1,4 +1,4 @@
## Copyright (c) 2012-2022 Leonid Yuriev <leo@yuriev.ru>.
## Copyright (c) 2012-2023 Leonid Yuriev <leo@yuriev.ru>.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.

View File

@ -1,4 +1,4 @@
## Copyright (c) 2012-2022 Leonid Yuriev <leo@yuriev.ru>.
## Copyright (c) 2012-2023 Leonid Yuriev <leo@yuriev.ru>.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.

View File

@ -4,7 +4,7 @@
*/
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
* Copyright 2017 Ilya Shipitsin <chipitsine@gmail.com>.
* Copyright 2012-2015 Howard Chu, Symas Corp.
* All rights reserved.

View File

@ -4,7 +4,7 @@
*/
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
* Copyright 2012-2015 Howard Chu, Symas Corp.
* Copyright 2015,2016 Peter-Service R&D LLC.
* All rights reserved.

643
mdbx.h
View File

@ -25,7 +25,7 @@ _The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет
\section copyright LICENSE & COPYRIGHT
\authors Copyright (c) 2015-2022, Leonid Yuriev <leo@yuriev.ru>
\authors Copyright (c) 2015-2023, Leonid Yuriev <leo@yuriev.ru>
and other _libmdbx_ authors: please see [AUTHORS](./AUTHORS) file.
\copyright Redistribution and use in source and binary forms, with or without
@ -77,10 +77,10 @@ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#if defined(__riscv) || defined(__riscv__) || defined(__RISCV) || \
defined(__RISCV__)
#warning The RISC-V architecture is intentionally insecure by design. \
#warning "The RISC-V architecture is intentionally insecure by design. \
Please delete this admonition at your own risk, \
if you make such decision informed and consciously. \
Refer to https://clck.ru/32d9xH for more information.
Refer to https://clck.ru/32d9xH for more information."
#endif /* RISC-V */
#ifdef _MSC_VER
@ -634,9 +634,9 @@ typedef mode_t mdbx_mode_t;
extern "C" {
#endif
/* MDBX version 0.11.x */
/* MDBX version 0.12.x */
#define MDBX_VERSION_MAJOR 0
#define MDBX_VERSION_MINOR 11
#define MDBX_VERSION_MINOR 12
#ifndef LIBMDBX_API
#if defined(LIBMDBX_EXPORTS)
@ -835,18 +835,48 @@ enum MDBX_constants {
#ifndef MDBX_LOCKNAME
/** \brief The name of the lock file in the environment
* without using \ref MDBX_NOSUBDIR */
#if !(defined(_WIN32) || defined(_WIN64))
#define MDBX_LOCKNAME "/mdbx.lck"
#endif
#else
#define MDBX_LOCKNAME_W L"\\mdbx.lck"
#define MDBX_LOCKNAME_A "\\mdbx.lck"
#ifdef UNICODE
#define MDBX_LOCKNAME MDBX_LOCKNAME_W
#else
#define MDBX_LOCKNAME MDBX_LOCKNAME_A
#endif /* UNICODE */
#endif /* Windows */
#endif /* MDBX_LOCKNAME */
#ifndef MDBX_DATANAME
/** \brief The name of the data file in the environment
* without using \ref MDBX_NOSUBDIR */
#if !(defined(_WIN32) || defined(_WIN64))
#define MDBX_DATANAME "/mdbx.dat"
#endif
#else
#define MDBX_DATANAME_W L"\\mdbx.dat"
#define MDBX_DATANAME_A "\\mdbx.dat"
#ifdef UNICODE
#define MDBX_DATANAME MDBX_DATANAME_W
#else
#define MDBX_DATANAME MDBX_DATANAME_A
#endif /* UNICODE */
#endif /* Windows */
#endif /* MDBX_DATANAME */
#ifndef MDBX_LOCK_SUFFIX
/** \brief The suffix of the lock file when \ref MDBX_NOSUBDIR is used */
#if !(defined(_WIN32) || defined(_WIN64))
#define MDBX_LOCK_SUFFIX "-lck"
#endif
#else
#define MDBX_LOCK_SUFFIX_W L"-lck"
#define MDBX_LOCK_SUFFIX_A "-lck"
#ifdef UNICODE
#define MDBX_LOCK_SUFFIX MDBX_LOCK_SUFFIX_W
#else
#define MDBX_LOCK_SUFFIX MDBX_LOCK_SUFFIX_A
#endif /* UNICODE */
#endif /* Windows */
#endif /* MDBX_LOCK_SUFFIX */
/* DEBUG & LOGGING ************************************************************/
@ -1028,12 +1058,15 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
const size_t bufsize);
/** \brief Panics with message and causes abnormal process termination. */
LIBMDBX_API void mdbx_panic(const char *fmt, ...) MDBX_PRINTF_ARGS(1, 2);
MDBX_NORETURN LIBMDBX_API void mdbx_panic(const char *fmt, ...)
MDBX_PRINTF_ARGS(1, 2);
/** \brief Panics with asserton failed message and causes abnormal process
* termination. */
LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, const char *msg,
const char *func, unsigned line);
MDBX_NORETURN LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env,
const char *msg,
const char *func,
unsigned line);
/** end of c_debug @} */
/** \brief Environment flags
@ -1043,6 +1076,13 @@ LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, const char *msg,
enum MDBX_env_flags_t {
MDBX_ENV_DEFAULTS = 0,
/** Extra validation of DB structure and pages content.
*
* The `MDBX_VALIDATION` enabled the simple safe/careful mode for working
* with damaged or untrusted DB. However, a notable performance
* degradation should be expected. */
MDBX_VALIDATION = UINT32_C(0x00002000),
/** No environment directory.
*
* By default, MDBX creates its environment in a directory whose pathname is
@ -1115,8 +1155,8 @@ enum MDBX_env_flags_t {
* while opening the database/environment which is already used by another
* process(es) with unknown mode/flags. In such cases, if there is a
* difference in the specified flags (\ref MDBX_NOMETASYNC,
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC, \ref MDBX_LIFORECLAIM,
* \ref MDBX_COALESCE and \ref MDBX_NORDAHEAD), instead of returning an error,
* \ref MDBX_SAFE_NOSYNC, \ref MDBX_UTTERLY_NOSYNC, \ref MDBX_LIFORECLAIM
* and \ref MDBX_NORDAHEAD), instead of returning an error,
* the database will be opened in a compatibility with the already used mode.
*
* `MDBX_ACCEDE` has no effect if the current process is the only one either
@ -1223,6 +1263,7 @@ enum MDBX_env_flags_t {
MDBX_NOMEMINIT = UINT32_C(0x1000000),
/** Aims to coalesce a Garbage Collection items.
* \note Always enabled since v0.12
*
* With `MDBX_COALESCE` flag MDBX will aims to coalesce items while recycling
* a Garbage Collection. Technically, when possible short lists of pages
@ -1593,8 +1634,7 @@ enum MDBX_put_flags_t {
MDBX_NOOVERWRITE = UINT32_C(0x10),
/** Has effect only for \ref MDBX_DUPSORT databases.
* For upsertion: don't write if the key-value pair already exist.
* For deletion: remove all values for key. */
* For upsertion: don't write if the key-value pair already exist. */
MDBX_NODUPDATA = UINT32_C(0x20),
/** For upsertion: overwrite the current key/data pair.
@ -1886,6 +1926,15 @@ enum MDBX_error_t {
/** Overlapping read and write transactions for the current thread */
MDBX_TXN_OVERLAPPING = -30415,
/** Внутренняя ошибка возвращаемая в случае нехватки запаса свободных страниц
* при обновлении GC. Используется как вспомогательное средство для отладки.
* \note С точки зрения пользователя семантически
* равнозначна \ref MDBX_PROBLEM. */
MDBX_BACKLOG_DEPLETED = -30414,
/** Alternative/Duplicate LCK-file is exists and should be removed manually */
MDBX_DUPLICATED_CLK = -30413,
/* The last of MDBX-added error codes */
MDBX_LAST_ADDED_ERRCODE = MDBX_TXN_OVERLAPPING,
@ -2011,7 +2060,9 @@ LIBMDBX_API const char *mdbx_strerror_r_ANSI2OEM(int errnum, char *buf,
* \returns a non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
/** \brief MDBX environment options. */
/** \brief MDBX environment extra runtime options.
* \ingroup c_settings
* \see mdbx_env_set_option() \see mdbx_env_get_option() */
enum MDBX_option_t {
/** \brief Controls the maximum number of named databases for the environment.
*
@ -2180,13 +2231,46 @@ enum MDBX_option_t {
* to 50% (half empty) which corresponds to the range from 8192 and to 32768
* in units respectively. */
MDBX_opt_merge_threshold_16dot16_percent,
/** \brief Controls the choosing between use write-through disk writes and
* usual ones with followed flush by the `fdatasync()` syscall.
* \details Depending on the operating system, storage subsystem
* characteristics and the use case, higher performance can be achieved by
* either using write-through or a serie of usual/lazy writes followed by
* the flush-to-disk.
*
* Basically for N chunks the latency/cost of write-through is:
* latency = N * (emit + round-trip-to-storage + storage-execution);
* And for serie of lazy writes with flush is:
* latency = N * (emit + storage-execution) + flush + round-trip-to-storage.
*
* So, for large N and/or noteable round-trip-to-storage the write+flush
* approach is win. But for small N and/or near-zero NVMe-like latency
* the write-through is better.
*
* To solve this issue libmdbx provide `MDBX_opt_writethrough_threshold`:
* - when N described above less or equal specified threshold,
* a write-through approach will be used;
* - otherwise, when N great than specified threshold,
* a write-and-flush approach will be used.
*
* \note MDBX_opt_writethrough_threshold affects only \ref MDBX_SYNC_DURABLE
* mode without \ref MDBX_WRITEMAP, and not supported on Windows.
* On Windows a write-through is used always but \ref MDBX_NOMETASYNC could
* be used for switching to write-and-flush. */
MDBX_opt_writethrough_threshold,
/** \brief Controls prevention of page-faults of reclaimed and allocated pages
* in the \ref MDBX_WRITEMAP mode by clearing ones through file handle before
* touching. */
MDBX_opt_prefault_write_enable,
};
#ifndef __cplusplus
/** \ingroup c_settings */
typedef enum MDBX_option_t MDBX_option_t;
#endif
/** \brief Sets the value of a runtime options for an environment.
/** \brief Sets the value of a extra runtime options for an environment.
* \ingroup c_settings
*
* \param [in] env An environment handle returned by \ref mdbx_env_create().
@ -2199,7 +2283,7 @@ typedef enum MDBX_option_t MDBX_option_t;
LIBMDBX_API int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option,
uint64_t value);
/** \brief Gets the value of runtime options from an environment.
/** \brief Gets the value of extra runtime options from an environment.
* \ingroup c_settings
*
* \param [in] env An environment handle returned by \ref mdbx_env_create().
@ -2220,6 +2304,8 @@ LIBMDBX_API int mdbx_env_get_option(const MDBX_env *env,
* be called later to discard the \ref MDBX_env handle and release associated
* resources.
*
* \note On Windows the \ref mdbx_env_openW() is recommended to use.
*
* \param [in] env An environment handle returned
* by \ref mdbx_env_create()
*
@ -2287,6 +2373,14 @@ LIBMDBX_API int mdbx_env_get_option(const MDBX_env *env,
LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *pathname,
MDBX_env_flags_t flags, mdbx_mode_t mode);
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
/** \copydoc mdbx_env_open()
* \note Available only on Windows.
* \see mdbx_env_open() */
LIBMDBX_API int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
MDBX_env_flags_t flags, mdbx_mode_t mode);
#endif /* Windows */
/** \brief Deletion modes for \ref mdbx_env_delete().
* \ingroup c_extra
* \see mdbx_env_delete() */
@ -2313,6 +2407,8 @@ typedef enum MDBX_env_delete_mode_t MDBX_env_delete_mode_t;
/** \brief Delete the environment's files in a proper and multiprocess-safe way.
* \ingroup c_extra
*
* \note On Windows the \ref mdbx_env_deleteW() is recommended to use.
*
* \param [in] pathname The pathname for the database or the directory in which
* the database files reside.
*
@ -2330,6 +2426,14 @@ typedef enum MDBX_env_delete_mode_t MDBX_env_delete_mode_t;
LIBMDBX_API int mdbx_env_delete(const char *pathname,
MDBX_env_delete_mode_t mode);
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
/** \copydoc mdbx_env_delete()
* \note Available only on Windows.
* \see mdbx_env_delete() */
LIBMDBX_API int mdbx_env_deleteW(const wchar_t *pathname,
MDBX_env_delete_mode_t mode);
#endif /* Windows */
/** \brief Copy an MDBX environment to the specified path, with options.
* \ingroup c_extra
*
@ -2339,6 +2443,8 @@ LIBMDBX_API int mdbx_env_delete(const char *pathname,
* parallel with write transactions, because it employs a read-only
* transaction. See long-lived transactions under \ref restrictions section.
*
* \note On Windows the \ref mdbx_env_copyW() is recommended to use.
*
* \param [in] env An environment handle returned by mdbx_env_create().
* It must have already been opened successfully.
* \param [in] dest The pathname of a file in which the copy will reside.
@ -2364,6 +2470,14 @@ LIBMDBX_API int mdbx_env_delete(const char *pathname,
LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest,
MDBX_copy_flags_t flags);
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
/** \copydoc mdbx_env_copy()
* \note Available only on Windows.
* \see mdbx_env_copy() */
LIBMDBX_API int mdbx_env_copyW(MDBX_env *env, const wchar_t *dest,
MDBX_copy_flags_t flags);
#endif /* Windows */
/** \brief Copy an environment to the specified file descriptor, with
* options.
* \ingroup c_extra
@ -2482,7 +2596,9 @@ struct MDBX_envinfo {
uint64_t mi_unsync_volume;
/** Current auto-sync threshold, see \ref mdbx_env_set_syncbytes(). */
uint64_t mi_autosync_threshold;
/** Time since the last steady sync in 1/65536 of second */
/** Time since entering to a "dirty" out-of-sync state in units of 1/65536 of
* second. In other words, this is the time since the last non-steady commit
* or zero if it was steady. */
uint32_t mi_since_sync_seconds16dot16;
/** Current auto-sync period in 1/65536 of second,
* see \ref mdbx_env_set_syncperiod(). */
@ -2510,6 +2626,12 @@ struct MDBX_envinfo {
uint64_t unspill; /**< Quantity of unspilled/reloaded pages */
uint64_t wops; /**< Number of explicit write operations (not a pages)
to a disk */
uint64_t prefault; /**< Number of prefault write operations (not a pages) */
uint64_t mincore; /**< Number of mincore() calls */
uint64_t
msync; /**< Number of explicit msync-to-disk operations (not a pages) */
uint64_t
fsync; /**< Number of explicit fsync-to-disk operations (not a pages) */
} mi_pgop_stat;
};
#ifndef __cplusplus
@ -2766,6 +2888,94 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) {
return mdbx_env_close_ex(env, false);
}
/** \brief Warming up options
* \ingroup c_settings
* \anchor warmup_flags
* \see mdbx_env_warmup() */
enum MDBX_warmup_flags_t {
/** By default \ref mdbx_env_warmup() just ask OS kernel to asynchronously
* prefetch database pages. */
MDBX_warmup_default = 0,
/** Peeking all pages of allocated portion of the database
* to force ones to be loaded into memory. However, the pages are just peeks
* sequentially, so unused pages that are in GC will be loaded in the same
* way as those that contain payload. */
MDBX_warmup_force = 1,
/** Using system calls to peeks pages instead of directly accessing ones,
* which at the cost of additional overhead avoids killing the current
* process by OOM-killer in a lack of memory condition.
* \note Has effect only on POSIX (non-Windows) systems with conjunction
* to \ref MDBX_warmup_force option. */
MDBX_warmup_oomsafe = 2,
/** Try to lock database pages in memory by `mlock()` on POSIX-systems
* or `VirtualLock()` on Windows. Please refer to description of these
* functions for reasonability of such locking and the information of
* effects, including the system as a whole.
*
* Such locking in memory requires that the corresponding resource limits
* (e.g. `RLIMIT_RSS`, `RLIMIT_MEMLOCK` or process working set size)
* and the availability of system RAM are sufficiently high.
*
* On successful, all currently allocated pages, both unused in GC and
* containing payload, will be locked in memory until the environment closes,
* or explicitly unblocked by using \ref MDBX_warmup_release, or the
* database geomenry will changed, including its auto-shrinking. */
MDBX_warmup_lock = 4,
/** Alters corresponding current resource limits to be enough for lock pages
* by \ref MDBX_warmup_lock. However, this option should be used in simpler
* applications since takes into account only current size of this environment
* disregarding all other factors. For real-world database application you
* will need full-fledged management of resources and their limits with
* respective engineering. */
MDBX_warmup_touchlimit = 8,
/** Release the lock that was performed before by \ref MDBX_warmup_lock. */
MDBX_warmup_release = 16,
};
#ifndef __cplusplus
typedef enum MDBX_warmup_flags_t MDBX_warmup_flags_t;
#else
DEFINE_ENUM_FLAG_OPERATORS(MDBX_warmup_flags_t)
#endif
/** \brief Warms up the database by loading pages into memory, optionally lock
* ones. \ingroup c_settings
*
* Depending on the specified flags, notifies OS kernel about following access,
* force loads the database pages, including locks ones in memory or releases
* such a lock. However, the function does not analyze the b-tree nor the GC.
* Therefore an unused pages that are in GC handled (i.e. will be loaded) in
* the same way as those that contain payload.
*
* At least one of `env` or `txn` argument must be non-null.
*
* \param [in] env An environment handle returned
* by \ref mdbx_env_create().
* \param [in] txn A transaction handle returned
* by \ref mdbx_txn_begin().
* \param [in] flags The \ref warmup_flags, bitwise OR'ed together.
*
* \param [in] timeout_seconds_16dot16 Optional timeout which checking only
* during explicitly peeking database pages
* for loading ones if the \ref MDBX_warmup_force
* option was specified.
*
* \returns A non-zero error value on failure and 0 on success.
* Some possible errors are:
*
* \retval MDBX_ENOSYS The system does not support requested
* operation(s).
*
* \retval MDBX_RESULT_TRUE The specified timeout is reached during load
* data into memory. */
LIBMDBX_API int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn,
MDBX_warmup_flags_t flags,
unsigned timeout_seconds_16dot16);
/** \brief Set environment flags.
* \ingroup c_settings
*
@ -2804,6 +3014,8 @@ LIBMDBX_API int mdbx_env_get_flags(const MDBX_env *env, unsigned *flags);
/** \brief Return the path that was used in mdbx_env_open().
* \ingroup c_statinfo
*
* \note On Windows the \ref mdbx_env_get_pathW() is recommended to use.
*
* \param [in] env An environment handle returned by \ref mdbx_env_create()
* \param [out] dest Address of a string pointer to contain the path.
* This is the actual string in the environment, not a
@ -2814,6 +3026,13 @@ LIBMDBX_API int mdbx_env_get_flags(const MDBX_env *env, unsigned *flags);
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_env_get_path(const MDBX_env *env, const char **dest);
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
/** \copydoc mdbx_env_get_path()
* \note Available only on Windows.
* \see mdbx_env_get_path() */
LIBMDBX_API int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **dest);
#endif /* Windows */
/** \brief Return the file descriptor for the given environment.
* \ingroup c_statinfo
*
@ -3093,6 +3312,21 @@ mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags);
MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t
mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags);
/** \brief Returns maximal size of key-value pair to fit in a single page with
* the given size and database flags, or -1 if pagesize is invalid.
* \ingroup c_statinfo
* \see db_flags */
MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t
mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags);
/** \brief Returns maximal data size in bytes to fit in a leaf-page or
* single overflow/large-page with the given page size and database flags,
* or -1 if pagesize is invalid.
* \ingroup c_statinfo
* \see db_flags */
MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t
mdbx_limits_valsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags);
/** \brief Returns maximal write transaction size (i.e. limit for summary volume
* of dirty pages) in bytes for given page size, or -1 if pagesize is invalid.
* \ingroup c_statinfo */
@ -3248,6 +3482,32 @@ mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags);
MDBX_DEPRECATED MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int
mdbx_env_get_maxkeysize(const MDBX_env *env);
/** \brief Returns maximal size of key-value pair to fit in a single page
* for specified database flags.
* \ingroup c_statinfo
*
* \param [in] env An environment handle returned by \ref mdbx_env_create().
* \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY
* and so on). \see db_flags
*
* \returns The maximum size of a data can write,
* or -1 if something is wrong. */
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int
mdbx_env_get_pairsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags);
/** \brief Returns maximal data size in bytes to fit in a leaf-page or
* single overflow/large-page for specified database flags.
* \ingroup c_statinfo
*
* \param [in] env An environment handle returned by \ref mdbx_env_create().
* \param [in] flags Database options (\ref MDBX_DUPSORT, \ref MDBX_INTEGERKEY
* and so on). \see db_flags
*
* \returns The maximum size of a data can write,
* or -1 if something is wrong. */
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int
mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t flags);
/** \brief Sets application information (a context pointer) associated with
* the environment.
* \see mdbx_env_get_userctx()
@ -3526,8 +3786,8 @@ struct MDBX_commit_latency {
/** \brief Duration of preparation (commit child transactions, update
* sub-databases records and cursors destroying). */
uint32_t preparation;
/** \brief Duration of GC/freeDB handling & updation. */
uint32_t gc;
/** \brief Duration of GC update by wall clock. */
uint32_t gc_wallclock;
/** \brief Duration of internal audit if enabled. */
uint32_t audit;
/** \brief Duration of writing dirty/modified data pages to a filesystem,
@ -3540,6 +3800,74 @@ struct MDBX_commit_latency {
uint32_t ending;
/** \brief The total duration of a commit. */
uint32_t whole;
/** \brief User-mode CPU time spent on GC update. */
uint32_t gc_cputime;
/** \brief Информация для профилирования работы GC.
* \note Статистика является общей для всех процессов работающих с одним
* файлом БД и хранится в LCK-файле. Данные аккумулируются при фиксации всех
* транзакций, но только в сборках libmdbx c установленной опцией
* \ref MDBX_ENABLE_PROFGC. Собранная статистика возвращаются любому процессу
* при использовании \ref mdbx_txn_commit_ex() и одновременно обнуляется
* при завершении транзакций верхнего уровня (не вложенных). */
struct {
/** \brief Количество итераций обновления GC,
* больше 1 если были повторы/перезапуски. */
uint32_t wloops;
/** \brief Количество итераций слияния записей GC. */
uint32_t coalescences;
/** \brief Количество уничтожений предыдущих надежных/устойчивых
* точек фиксации при работе в режиме \ref MDBX_UTTERLY_NOSYNC. */
uint32_t wipes;
/** \brief Количество принудительных фиксаций на диск
* во избежания приращения БД при работе вне режима
* \ref MDBX_UTTERLY_NOSYNC. */
uint32_t flushes;
/** \brief Количество обращений к механизму Handle-Slow-Readers
* во избежания приращения БД.
* \see MDBX_hsr_func */
uint32_t kicks;
/** \brief Счетчик выполнения по медленному пути (slow path execution count)
* GC ради данных пользователя. */
uint32_t work_counter;
/** \brief Время "по настенным часам" затраченное на чтение и поиск внутри
* GC ради данных пользователя. */
uint32_t work_rtime_monotonic;
/** \brief Время ЦПУ в режиме пользователе затраченное
* на подготовку страниц извлекаемых из GC для данных пользователя,
* включая подкачку с диска. */
uint32_t work_xtime_cpu;
/** \brief Количество итераций поиска внутри GC при выделении страниц
* ради данных пользователя. */
uint32_t work_rsteps;
/** \brief Количество запросов на выделение последовательностей страниц
* ради данных пользователя. */
uint32_t work_xpages;
/** \brief Количество страничных промахов (page faults) внутри GC
* при выделении и подготовки страниц для данных пользователя. */
uint32_t work_majflt;
/** \brief Счетчик выполнения по медленному пути (slow path execution count)
* GC для целей поддержки и обновления самой GC. */
uint32_t self_counter;
/** \brief Время "по настенным часам" затраченное на чтение и поиск внутри
* GC для целей поддержки и обновления самой GC. */
uint32_t self_rtime_monotonic;
/** \brief Время ЦПУ в режиме пользователе затраченное на подготовку
* страниц извлекаемых из GC для целей поддержки и обновления самой GC,
* включая подкачку с диска. */
uint32_t self_xtime_cpu;
/** \brief Количество итераций поиска внутри GC при выделении страниц
* для целей поддержки и обновления самой GC. */
uint32_t self_rsteps;
/** \brief Количество запросов на выделение последовательностей страниц
* для самой GC. */
uint32_t self_xpages;
/** \brief Количество страничных промахов (page faults) внутри GC
* при выделении и подготовки страниц для самой GC. */
uint32_t self_majflt;
} gc_prof;
};
#ifndef __cplusplus
/** \ingroup c_statinfo */
@ -3862,6 +4190,8 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a,
* by current thread. */
LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name,
MDBX_db_flags_t flags, MDBX_dbi *dbi);
LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name,
MDBX_db_flags_t flags, MDBX_dbi *dbi);
/** \deprecated Please
* \ref avoid_custom_comparators "avoid using custom comparators" and use
@ -3881,6 +4211,9 @@ LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name,
MDBX_DEPRECATED LIBMDBX_API int
mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags,
MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp);
MDBX_DEPRECATED LIBMDBX_API int
mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags,
MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp);
/** \defgroup value2key Value-to-Key functions
* \brief Value-to-Key functions to
@ -5083,11 +5416,12 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
* this value into account to evaluate the impact that
* a long-running transaction has.
* \param [in] retry A retry number starting from 0.
* If callback has returned 0 at least once, then at end
* of current handling loop the callback function will be
* called additionally with negative value to notify about
* the end of loop. The callback function can use this value
* to implement timeout logic while waiting for readers.
* If callback has returned 0 at least once, then at end of
* current handling loop the callback function will be
* called additionally with negative `retry` value to notify
* about the end of loop. The callback function can use this
* fact to implement timeout reset logic while waiting for
* a readers.
*
* \returns The RETURN CODE determines the further actions libmdbx and must
* match the action which was executed by the callback:
@ -5110,7 +5444,7 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
* \retval 1 Transaction aborted asynchronous and reader slot
* should be cleared immediately, i.e. read transaction
* will not continue but \ref mdbx_txn_abort()
* or \ref mdbx_txn_reset() will be called later.
* nor \ref mdbx_txn_reset() will be called later.
*
* \retval 2 or great The reader process was terminated or killed,
* and libmdbx should entirely reset reader registration.
@ -5177,18 +5511,20 @@ typedef enum MDBX_page_type_t MDBX_page_type_t;
#endif
/** \brief Pseudo-name for MainDB */
#define MDBX_PGWALK_MAIN ((const char *)((ptrdiff_t)0))
#define MDBX_PGWALK_MAIN ((void *)((ptrdiff_t)0))
/** \brief Pseudo-name for GarbageCollectorDB */
#define MDBX_PGWALK_GC ((const char *)((ptrdiff_t)-1))
#define MDBX_PGWALK_GC ((void *)((ptrdiff_t)-1))
/** \brief Pseudo-name for MetaPages */
#define MDBX_PGWALK_META ((const char *)((ptrdiff_t)-2))
#define MDBX_PGWALK_META ((void *)((ptrdiff_t)-2))
/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */
typedef int MDBX_pgvisitor_func(
const uint64_t pgno, const unsigned number, void *const ctx, const int deep,
const char *const dbi, const size_t page_size, const MDBX_page_type_t type,
const MDBX_error_t err, const size_t nentries, const size_t payload_bytes,
const size_t header_bytes, const size_t unused_bytes) MDBX_CXX17_NOEXCEPT;
typedef int
MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx,
const int deep, const MDBX_val *dbi_name,
const size_t page_size, const MDBX_page_type_t type,
const MDBX_error_t err, const size_t nentries,
const size_t payload_bytes, const size_t header_bytes,
const size_t unused_bytes) MDBX_CXX17_NOEXCEPT;
/** \brief B-tree traversal function. */
LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
@ -5199,11 +5535,24 @@ LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
*
* This function mostly of internal API for `mdbx_chk` utility and subject to
* change at any time. Do not use this function to avoid shooting your own
* leg(s). */
* leg(s).
*
* \note On Windows the \ref mdbx_env_open_for_recoveryW() is recommended
* to use. */
LIBMDBX_API int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname,
unsigned target_meta,
bool writeable);
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
/** \copydoc mdbx_env_open_for_recovery()
* \note Available only on Windows.
* \see mdbx_env_open_for_recovery() */
LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env,
const wchar_t *pathname,
unsigned target_meta,
bool writeable);
#endif /* Windows */
/** \brief Turn database to the specified meta-page.
*
* This function mostly of internal API for `mdbx_chk` utility and subject to
@ -5213,230 +5562,8 @@ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta);
/** end of btree_traversal @} */
/**** Attribute support functions for Nexenta (scheduled for removal)
* *****************************************************************/
#if defined(MDBX_NEXENTA_ATTRS) || defined(DOXYGEN)
/** \defgroup nexenta Attribute support functions for Nexenta
* \ingroup c_crud
* @{ */
typedef uint_fast64_t mdbx_attr_t;
/** Store by cursor with attribute.
*
* This function stores key/data pairs into the database. The cursor is
* positioned at the new item, or on failure usually near it.
*
* \note Internally based on \ref MDBX_RESERVE feature,
* therefore doesn't support \ref MDBX_DUPSORT.
*
* \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open()
* \param [in] key The key operated on.
* \param [in] data The data operated on.
* \param [in] attr The attribute.
* \param [in] flags Options for this operation. This parameter must be set
* to 0 or one of the values described here:
* - \ref MDBX_CURRENT
* Replace the item at the current cursor position. The key parameter
* must still be provided, and must match it, otherwise the function
* return \ref MDBX_EKEYMISMATCH.
*
* - \ref MDBX_APPEND
* Append the given key/data pair to the end of the database. No key
* comparisons are performed. This option allows fast bulk loading when
* keys are already known to be in the correct order. Loading unsorted
* keys with this flag will cause a \ref MDBX_KEYEXIST error.
*
* \see \ref c_crud_hints "Quick reference for Insert/Update/Delete operations"
*
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_EKEYMISMATCH
* \retval MDBX_MAP_FULL The database is full, see \ref mdbx_env_set_mapsize().
* \retval MDBX_TXN_FULL The transaction has too many dirty pages.
* \retval MDBX_EACCES An attempt was made to write in a read-only
* transaction.
* \retval MDBX_EINVAL an invalid parameter was specified. */
LIBMDBX_API int mdbx_cursor_put_attr(MDBX_cursor *cursor, MDBX_val *key,
MDBX_val *data, mdbx_attr_t attr,
MDBX_put_flags_t flags);
/** Store items and attributes into a database.
*
* This function stores key/data pairs in the database. The default behavior
* is to enter the new key/data pair, replacing any previously existing key
* if duplicates are disallowed.
*
* \note Internally based on \ref MDBX_RESERVE feature,
* therefore doesn't support \ref MDBX_DUPSORT.
*
* \param [in] txn A transaction handle returned by \ref mdbx_txn_begin().
* \param [in] dbi A database handle returned by \ref mdbx_dbi_open().
* \param [in] key The key to store in the database.
* \param [in] attr The attribute to store in the database.
* \param [in,out] data The data to store.
* \param [in] flags Special options for this operation. This parameter
* must be set to 0 or by bitwise OR'ing together one or
* more of the values described here:
* - \ref MDBX_NOOVERWRITE
* Enter the new key/data pair only if the key does not already appear
* in the database. The function will return \ref MDBX_KEYEXIST if the key
* already appears in the database. The data parameter will be set to
* point to the existing item.
*
* - \ref MDBX_CURRENT
* Update an single existing entry, but not add new ones. The function
* will return \ref MDBX_NOTFOUND if the given key not exist in the
* database. Or the \ref MDBX_EMULTIVAL in case duplicates for the given
* key.
*
* - \ref MDBX_APPEND
* Append the given key/data pair to the end of the database. This option
* allows fast bulk loading when keys are already known to be in the
* correct order. Loading unsorted keys with this flag will cause
* a \ref MDBX_EKEYMISMATCH error.
*
* \see \ref c_crud_hints "Quick reference for Insert/Update/Delete operations"
*
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_KEYEXIST
* \retval MDBX_MAP_FULL The database is full, see \ref mdbx_env_set_mapsize().
* \retval MDBX_TXN_FULL The transaction has too many dirty pages.
* \retval MDBX_EACCES An attempt was made to write
* in a read-only transaction.
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_put_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
MDBX_val *data, mdbx_attr_t attr,
MDBX_put_flags_t flags);
/** Set items attribute from a database.
*
* This function stores key/data pairs attribute to the database.
*
* \note Internally based on \ref MDBX_RESERVE feature,
* therefore doesn't support \ref MDBX_DUPSORT.
*
* \param [in] txn A transaction handle returned by \ref mdbx_txn_begin().
* \param [in] dbi A database handle returned by \ref mdbx_dbi_open().
* \param [in] key The key to search for in the database.
* \param [in] data The data to be stored or NULL to save previous value.
* \param [in] attr The attribute to be stored.
*
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_NOTFOUND The key-value pair was not in the database.
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
MDBX_val *data, mdbx_attr_t attr);
/** Get items attribute from a database cursor.
*
* This function retrieves key/data pairs from the database. The address and
* length of the key are returned in the object to which key refers (except
* for the case of the \ref MDBX_SET option, in which the key object is
* unchanged), and the address and length of the data are returned in the object
* to which data refers.
* \see mdbx_get()
*
* \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open().
* \param [in,out] key The key for a retrieved item.
* \param [in,out] data The data of a retrieved item.
* \param [out] pattr The pointer to retrieve attribute.
* \param [in] op A cursor operation MDBX_cursor_op.
*
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_NOTFOUND No matching key found.
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_cursor_get_attr(MDBX_cursor *cursor, MDBX_val *key,
MDBX_val *data, mdbx_attr_t *pattr,
MDBX_cursor_op op);
/** Get items attribute from a database.
*
* This function retrieves key/data pairs from the database. The address
* and length of the data associated with the specified key are returned
* in the structure to which data refers.
* If the database supports duplicate keys (see \ref MDBX_DUPSORT) then the
* first data item for the key will be returned. Retrieval of other
* items requires the use of \ref mdbx_cursor_get().
*
* \note The memory pointed to by the returned values is owned by the
* database. The caller need not dispose of the memory, and may not
* modify it in any way. For values returned in a read-only transaction
* any modification attempts will cause a `SIGSEGV`.
*
* \note Values returned from the database are valid only until a
* subsequent update operation, or the end of the transaction.
*
* \param [in] txn A transaction handle returned by \ref mdbx_txn_begin().
* \param [in] dbi A database handle returned by \ref mdbx_dbi_open().
* \param [in] key The key to search for in the database.
* \param [in,out] data The data corresponding to the key.
* \param [out] pattr The pointer to retrieve attribute.
*
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_NOTFOUND The key was not in the database.
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_get_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
MDBX_val *data, mdbx_attr_t *pattr);
/** end of nexenta @} */
#endif /* MDBX_NEXENTA_ATTRS */
/** end of c_api @} */
/*******************************************************************************
* Workaround for mmaped-lookahead-cross-page-boundary bug
* in an obsolete versions of Elbrus's libc and kernels. */
#if defined(__e2k__) && defined(MDBX_E2K_MLHCPB_WORKAROUND) && \
MDBX_E2K_MLHCPB_WORKAROUND
LIBMDBX_API int mdbx_e2k_memcmp_bug_workaround(const void *s1, const void *s2,
size_t n);
LIBMDBX_API int mdbx_e2k_strcmp_bug_workaround(const char *s1, const char *s2);
LIBMDBX_API int mdbx_e2k_strncmp_bug_workaround(const char *s1, const char *s2,
size_t n);
LIBMDBX_API size_t mdbx_e2k_strlen_bug_workaround(const char *s);
LIBMDBX_API size_t mdbx_e2k_strnlen_bug_workaround(const char *s,
size_t maxlen);
#ifdef __cplusplus
namespace std {
inline int mdbx_e2k_memcmp_bug_workaround(const void *s1, const void *s2,
size_t n) {
return ::mdbx_e2k_memcmp_bug_workaround(s1, s2, n);
}
inline int mdbx_e2k_strcmp_bug_workaround(const char *s1, const char *s2) {
return ::mdbx_e2k_strcmp_bug_workaround(s1, s2);
}
inline int mdbx_e2k_strncmp_bug_workaround(const char *s1, const char *s2,
size_t n) {
return ::mdbx_e2k_strncmp_bug_workaround(s1, s2, n);
}
inline size_t mdbx_e2k_strlen_bug_workaround(const char *s) {
return ::mdbx_e2k_strlen_bug_workaround(s);
}
inline size_t mdbx_e2k_strnlen_bug_workaround(const char *s, size_t maxlen) {
return ::mdbx_e2k_strnlen_bug_workaround(s, maxlen);
}
} // namespace std
#endif /* __cplusplus */
#include <string.h>
#include <strings.h>
#undef memcmp
#define memcmp mdbx_e2k_memcmp_bug_workaround
#undef bcmp
#define bcmp mdbx_e2k_memcmp_bug_workaround
#undef strcmp
#define strcmp mdbx_e2k_strcmp_bug_workaround
#undef strncmp
#define strncmp mdbx_e2k_strncmp_bug_workaround
#undef strlen
#define strlen mdbx_e2k_strlen_bug_workaround
#undef strnlen
#define strnlen mdbx_e2k_strnlen_bug_workaround
#endif /* MDBX_E2K_MLHCPB_WORKAROUND */
#ifdef __cplusplus
} /* extern "C" */
#endif

311
mdbx.h++
View File

@ -1,7 +1,7 @@
/// \file mdbx.h++
/// \brief The libmdbx C++ API header file.
///
/// \author Copyright (c) 2020-2022, Leonid Yuriev <leo@yuriev.ru>.
/// \author Copyright (c) 2020-2023, Leonid Yuriev <leo@yuriev.ru>.
/// \copyright SPDX-License-Identifier: Apache-2.0
///
/// Tested with:
@ -84,6 +84,11 @@
#include <experimental/filesystem>
#endif
#if __cplusplus >= 201103L
#include <chrono>
#include <ratio>
#endif
#include "mdbx.h"
#if (defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L) || \
@ -386,6 +391,11 @@ using path = ::std::wstring;
using path = ::std::string;
#endif /* mdbx::path */
#if __cplusplus >= 201103L || defined(DOXYGEN)
/// \brief Duration in 1/65536 units of second.
using duration = ::std::chrono::duration<unsigned, ::std::ratio<1, 65536>>;
#endif /* Duration for C++11 */
/// \defgroup cxx_exceptions exceptions and errors
/// @{
@ -3189,6 +3199,7 @@ public:
/// \brief Returns the minimal values size in bytes for specified values
/// mode.
static inline size_t value_min(value_mode) noexcept;
/// \brief Returns the maximal value size in bytes for specified page size
/// and database flags.
static inline size_t value_max(intptr_t pagesize, MDBX_db_flags_t flags);
@ -3201,6 +3212,35 @@ public:
/// \brief Returns the maximal value size in bytes for specified page size
/// and values mode.
static inline size_t value_max(const env &, value_mode);
/// \brief Returns maximal size of key-value pair to fit in a single page
/// for specified size and database flags.
static inline size_t pairsize4page_max(intptr_t pagesize,
MDBX_db_flags_t flags);
/// \brief Returns maximal size of key-value pair to fit in a single page
/// for specified page size and values mode.
static inline size_t pairsize4page_max(intptr_t pagesize, value_mode);
/// \brief Returns maximal size of key-value pair to fit in a single page
/// for given environment and database flags.
static inline size_t pairsize4page_max(const env &, MDBX_db_flags_t flags);
/// \brief Returns maximal size of key-value pair to fit in a single page
/// for specified page size and values mode.
static inline size_t pairsize4page_max(const env &, value_mode);
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
/// single overflow/large-page for specified size and database flags.
static inline size_t valsize4page_max(intptr_t pagesize,
MDBX_db_flags_t flags);
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
/// single overflow/large-page for specified page size and values mode.
static inline size_t valsize4page_max(intptr_t pagesize, value_mode);
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
/// single overflow/large-page for given environment and database flags.
static inline size_t valsize4page_max(const env &, MDBX_db_flags_t flags);
/// \brief Returns maximal data size in bytes to fit in a leaf-page or
/// single overflow/large-page for specified page size and values mode.
static inline size_t valsize4page_max(const env &, value_mode);
/// \brief Returns the maximal write transaction size (i.e. limit for
/// summary volume of dirty pages) in bytes for specified page size.
static inline size_t transaction_size_max(intptr_t pagesize);
@ -3237,6 +3277,8 @@ public:
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
env &copy(const ::std::wstring &destination, bool compactify,
bool force_dynamic_size = false);
env &copy(const wchar_t *destination, bool compactify,
bool force_dynamic_size = false);
#endif /* Windows */
env &copy(const ::std::string &destination, bool compactify,
bool force_dynamic_size = false);
@ -3272,6 +3314,8 @@ public:
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
static bool remove(const ::std::wstring &pathname,
const remove_mode mode = just_remove);
static bool remove(const wchar_t *pathname,
const remove_mode mode = just_remove);
#endif /* Windows */
static bool remove(const ::std::string &pathname,
const remove_mode mode = just_remove);
@ -3312,9 +3356,11 @@ public:
/// \brief Returns the maximum number of threads/reader slots for the
/// environment.
/// \see extra_runtime_option::max_readers
inline unsigned max_readers() const;
/// \brief Returns the maximum number of named databases for the environment.
/// \see extra_runtime_option::max_maps
inline unsigned max_maps() const;
/// \brief Returns the application context associated with the environment.
@ -3326,59 +3372,117 @@ public:
/// \brief Sets threshold to force flush the data buffers to disk, for
/// non-sync durability modes.
///
/// The threshold value affects all processes which operates with given
/// environment until the last process close environment or a new value will
/// be settled.
/// Data is always written to disk when \ref txn_managed::commit() is called,
/// but the operating system may keep it buffered. MDBX always flushes the OS
/// buffers upon commit as well, unless the environment was opened with \ref
/// whole_fragile, \ref lazy_weak_tail or in part \ref
/// half_synchronous_weak_last. The default is 0, than mean no any threshold
/// checked, and no additional flush will be made.
/// \details The threshold value affects all processes which operates with
/// given environment until the last process close environment or a new value
/// will be settled. Data is always written to disk when \ref
/// txn_managed::commit() is called, but the operating system may keep it
/// buffered. MDBX always flushes the OS buffers upon commit as well, unless
/// the environment was opened with \ref whole_fragile, \ref lazy_weak_tail or
/// in part \ref half_synchronous_weak_last.
///
/// The default is 0, than mean no any threshold checked, and no additional
/// flush will be made.
/// \see extra_runtime_option::sync_bytes
inline env &set_sync_threshold(size_t bytes);
/// \brief Gets threshold used to force flush the data buffers to disk, for
/// non-sync durability modes.
///
/// \copydetails set_sync_threshold()
/// \see extra_runtime_option::sync_bytes
inline size_t sync_threshold() const;
#if __cplusplus >= 201103L || defined(DOXYGEN)
/// \brief Sets relative period since the last unsteady commit to force flush
/// the data buffers to disk, for non-sync durability modes.
///
/// The relative period value affects all processes which operates with given
/// environment until the last process close environment or a new value will
/// be settled.
/// Data is always written to disk when \ref txn_managed::commit() is called,
/// but the operating system may keep it buffered. MDBX always flushes the OS
/// buffers upon commit as well, unless the environment was opened with \ref
/// whole_fragile, \ref lazy_weak_tail or in part \ref
/// half_synchronous_weak_last. Settled period don't checked asynchronously,
/// but only by the \ref txn_managed::commit() and \ref env::sync_to_disk()
/// functions. Therefore, in cases where transactions are committed
/// infrequently and/or irregularly, polling by \ref env::poll_sync_to_disk()
/// may be a reasonable solution to timeout enforcement. The default is 0,
/// than mean no any timeout checked, and no additional flush will be made.
/// \details The relative period value affects all processes which operates
/// with given environment until the last process close environment or a new
/// value will be settled. Data is always written to disk when \ref
/// txn_managed::commit() is called, but the operating system may keep it
/// buffered. MDBX always flushes the OS buffers upon commit as well, unless
/// the environment was opened with \ref whole_fragile, \ref lazy_weak_tail or
/// in part \ref half_synchronous_weak_last. Settled period don't checked
/// asynchronously, but only by the \ref txn_managed::commit() and \ref
/// env::sync_to_disk() functions. Therefore, in cases where transactions are
/// committed infrequently and/or irregularly, polling by \ref
/// env::poll_sync_to_disk() may be a reasonable solution to timeout
/// enforcement.
///
/// The default is 0, than mean no any timeout checked, and no additional
/// flush will be made.
/// \see extra_runtime_option::sync_period
inline env &set_sync_period(const duration &period);
/// \brief Gets relative period since the last unsteady commit that used to
/// force flush the data buffers to disk, for non-sync durability modes.
/// \copydetails set_sync_period(const duration&)
/// \see set_sync_period(const duration&)
/// \see extra_runtime_option::sync_period
inline duration sync_period() const;
#endif
/// \copydoc set_sync_period(const duration&)
/// \param [in] seconds_16dot16 The period in 1/65536 of second when a
/// synchronous flush would be made since the last unsteady commit.
inline env &set_sync_period(unsigned seconds_16dot16);
inline env &set_sync_period__seconds_16dot16(unsigned seconds_16dot16);
/// \brief Sets relative period since the last unsteady commit to force flush
/// the data buffers to disk, for non-sync durability modes.
///
/// The relative period value affects all processes which operates with given
/// environment until the last process close environment or a new value will
/// be settled.
/// Data is always written to disk when \ref txn_managed::commit() is called,
/// but the operating system may keep it buffered. MDBX always flushes the OS
/// buffers upon commit as well, unless the environment was opened with \ref
/// whole_fragile, \ref lazy_weak_tail or in part \ref
/// half_synchronous_weak_last. Settled period don't checked asynchronously,
/// but only by the \ref txn_managed::commit() and \ref env::sync_to_disk()
/// functions. Therefore, in cases where transactions are committed
/// infrequently and/or irregularly, polling by \ref env::poll_sync_to_disk()
/// may be a reasonable solution to timeout enforcement. The default is 0,
/// than mean no any timeout checked, and no additional flush will be made.
///
/// \copydoc sync_period()
/// \see sync_period__seconds_16dot16(unsigned)
inline unsigned sync_period__seconds_16dot16() const;
/// \copydoc set_sync_period(const duration&)
/// \param [in] seconds The period in second when a synchronous flush would
/// be made since the last unsteady commit.
inline env &set_sync_period(double seconds);
inline env &set_sync_period__seconds_double(double seconds);
/// \copydoc sync_period()
/// \see set_sync_period__seconds_double(double)
inline double sync_period__seconds_double() const;
/// \copydoc MDBX_option_t
enum class extra_runtime_option {
/// \copydoc MDBX_opt_max_db
/// \see max_maps() \see env::operate_parameters::max_maps
max_maps = MDBX_opt_max_db,
/// \copydoc MDBX_opt_max_readers
/// \see max_readers() \see env::operate_parameters::max_readers
max_readers = MDBX_opt_max_readers,
/// \copydoc MDBX_opt_sync_bytes
/// \see sync_threshold() \see set_sync_threshold()
sync_bytes = MDBX_opt_sync_bytes,
/// \copydoc MDBX_opt_sync_period
/// \see sync_period() \see set_sync_period()
sync_period = MDBX_opt_sync_period,
/// \copydoc MDBX_opt_rp_augment_limit
rp_augment_limit = MDBX_opt_rp_augment_limit,
/// \copydoc MDBX_opt_loose_limit
loose_limit = MDBX_opt_loose_limit,
/// \copydoc MDBX_opt_dp_reserve_limit
dp_reserve_limit = MDBX_opt_dp_reserve_limit,
/// \copydoc MDBX_opt_txn_dp_limit
dp_limit = MDBX_opt_txn_dp_limit,
/// \copydoc MDBX_opt_txn_dp_initial
dp_initial = MDBX_opt_txn_dp_initial,
/// \copydoc MDBX_opt_spill_max_denominator
spill_max_denominator = MDBX_opt_spill_max_denominator,
/// \copydoc MDBX_opt_spill_min_denominator
spill_min_denominator = MDBX_opt_spill_min_denominator,
/// \copydoc MDBX_opt_spill_parent4child_denominator
spill_parent4child_denominator = MDBX_opt_spill_parent4child_denominator,
/// \copydoc MDBX_opt_merge_threshold_16dot16_percent
merge_threshold_16dot16_percent = MDBX_opt_merge_threshold_16dot16_percent,
/// \copydoc MDBX_opt_writethrough_threshold
writethrough_threshold = MDBX_opt_writethrough_threshold,
/// \copydoc MDBX_opt_prefault_write_enable
prefault_write_enable = MDBX_opt_prefault_write_enable,
};
/// \copybrief mdbx_env_set_option()
inline env &set_extra_option(extra_runtime_option option, uint64_t value);
/// \copybrief mdbx_env_get_option()
inline uint64_t extra_option(extra_runtime_option option) const;
/// \brief Alter environment flags.
inline env &alter_flags(MDBX_env_flags_t flags, bool on_off);
@ -3519,6 +3623,8 @@ public:
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
env_managed(const ::std::wstring &pathname, const operate_parameters &,
bool accede = true);
explicit env_managed(const wchar_t *pathname, const operate_parameters &,
bool accede = true);
#endif /* Windows */
env_managed(const ::std::string &pathname, const operate_parameters &,
bool accede = true);
@ -3543,6 +3649,8 @@ public:
#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN)
env_managed(const ::std::wstring &pathname, const create_parameters &,
const operate_parameters &, bool accede = true);
explicit env_managed(const wchar_t *pathname, const create_parameters &,
const operate_parameters &, bool accede = true);
#endif /* Windows */
env_managed(const ::std::string &pathname, const create_parameters &,
const operate_parameters &, bool accede = true);
@ -3879,12 +3987,31 @@ public:
//----------------------------------------------------------------------------
/// \brief Abandon all the operations of the transaction instead of saving
/// them.
/// \brief Abandon all the operations of the transaction
/// instead of saving ones.
void abort();
/// \brief Commit all the operations of a transaction into the database.
void commit();
using commit_latency = MDBX_commit_latency;
/// \brief Commit all the operations of a transaction into the database
/// and collect latency information.
void commit(commit_latency *);
/// \brief Commit all the operations of a transaction into the database
/// and collect latency information.
void commit(commit_latency &latency) { return commit(&latency); }
/// \brief Commit all the operations of a transaction into the database
/// and return latency information.
/// \returns latency information of commit stages.
commit_latency commit_get_latency() {
commit_latency result;
commit(&result);
return result;
}
};
/// \brief Unmanaged cursor.
@ -4867,6 +4994,56 @@ inline size_t env::limits::value_max(const env &env, value_mode mode) {
return value_max(env, MDBX_db_flags_t(mode));
}
inline size_t env::limits::pairsize4page_max(intptr_t pagesize,
MDBX_db_flags_t flags) {
const intptr_t result = mdbx_limits_pairsize4page_max(pagesize, flags);
if (result < 0)
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
return static_cast<size_t>(result);
}
inline size_t env::limits::pairsize4page_max(intptr_t pagesize,
value_mode mode) {
return pairsize4page_max(pagesize, MDBX_db_flags_t(mode));
}
inline size_t env::limits::pairsize4page_max(const env &env,
MDBX_db_flags_t flags) {
const intptr_t result = mdbx_env_get_pairsize4page_max(env, flags);
if (result < 0)
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
return static_cast<size_t>(result);
}
inline size_t env::limits::pairsize4page_max(const env &env, value_mode mode) {
return pairsize4page_max(env, MDBX_db_flags_t(mode));
}
inline size_t env::limits::valsize4page_max(intptr_t pagesize,
MDBX_db_flags_t flags) {
const intptr_t result = mdbx_limits_valsize4page_max(pagesize, flags);
if (result < 0)
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
return static_cast<size_t>(result);
}
inline size_t env::limits::valsize4page_max(intptr_t pagesize,
value_mode mode) {
return valsize4page_max(pagesize, MDBX_db_flags_t(mode));
}
inline size_t env::limits::valsize4page_max(const env &env,
MDBX_db_flags_t flags) {
const intptr_t result = mdbx_env_get_valsize4page_max(env, flags);
if (result < 0)
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_EINVAL);
return static_cast<size_t>(result);
}
inline size_t env::limits::valsize4page_max(const env &env, value_mode mode) {
return valsize4page_max(env, MDBX_db_flags_t(mode));
}
inline size_t env::limits::transaction_size_max(intptr_t pagesize) {
const intptr_t result = mdbx_limits_txnsize_max(pagesize);
if (result < 0)
@ -4961,13 +5138,53 @@ inline env &env::set_sync_threshold(size_t bytes) {
return *this;
}
inline env &env::set_sync_period(unsigned seconds_16dot16) {
inline size_t env::sync_threshold() const {
size_t bytes;
error::success_or_throw(::mdbx_env_get_syncbytes(handle_, &bytes));
return bytes;
}
inline env &env::set_sync_period__seconds_16dot16(unsigned seconds_16dot16) {
error::success_or_throw(::mdbx_env_set_syncperiod(handle_, seconds_16dot16));
return *this;
}
inline env &env::set_sync_period(double seconds) {
return set_sync_period(unsigned(seconds * 65536));
inline unsigned env::sync_period__seconds_16dot16() const {
unsigned seconds_16dot16;
error::success_or_throw(::mdbx_env_get_syncperiod(handle_, &seconds_16dot16));
return seconds_16dot16;
}
inline env &env::set_sync_period__seconds_double(double seconds) {
return set_sync_period__seconds_16dot16(unsigned(seconds * 65536));
}
inline double env::sync_period__seconds_double() const {
return sync_period__seconds_16dot16() / 65536.0;
}
#if __cplusplus >= 201103L
inline env &env::set_sync_period(const duration &period) {
return set_sync_period__seconds_16dot16(period.count());
}
inline duration env::sync_period() const {
return duration(sync_period__seconds_16dot16());
}
#endif
inline env &env::set_extra_option(enum env::extra_runtime_option option,
uint64_t value) {
error::success_or_throw(
::mdbx_env_set_option(handle_, ::MDBX_option_t(option), value));
return *this;
}
inline uint64_t env::extra_option(enum env::extra_runtime_option option) const {
uint64_t value;
error::success_or_throw(
::mdbx_env_get_option(handle_, ::MDBX_option_t(option), &value));
return value;
}
inline env &env::alter_flags(MDBX_env_flags_t flags, bool on_off) {

View File

@ -1,5 +1,5 @@
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*

View File

@ -1,5 +1,5 @@
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -63,7 +63,7 @@
#define SSIZE_MAX INTPTR_MAX
#endif
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul
#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul || defined(_WIN64)
#define MDBX_WORDBITS 64
#else
#define MDBX_WORDBITS 32
@ -259,8 +259,10 @@ __extern_C key_t ftok(const char *, int);
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/time.h>
#include <sys/uio.h>
#endif /*---------------------------------------------------------------------*/
@ -302,8 +304,9 @@ __extern_C key_t ftok(const char *, int);
/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
defined(__amd64) || defined(_M_X64))
#if !defined(__amd64__) && \
(defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || \
defined(_M_X64) || defined(_M_AMD64))
/* LY: define trusty __amd64__ for all AMD64/x86-64 arch */
#define __amd64__ 1
#endif /* __amd64__ */
@ -371,18 +374,50 @@ __extern_C key_t ftok(const char *, int);
#endif
#endif /* __BYTE_ORDER__ || __ORDER_LITTLE_ENDIAN__ || __ORDER_BIG_ENDIAN__ */
/*----------------------------------------------------------------------------*/
/* Availability of CMOV or equivalent */
#ifndef MDBX_HAVE_CMOV
#if defined(__e2k__)
#define MDBX_HAVE_CMOV 1
#elif defined(__thumb2__) || defined(__thumb2)
#define MDBX_HAVE_CMOV 1
#elif defined(__thumb__) || defined(__thumb) || defined(__TARGET_ARCH_THUMB)
#define MDBX_HAVE_CMOV 0
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) || \
defined(__aarch64) || defined(__arm__) || defined(__arm) || \
defined(__CC_ARM)
#define MDBX_HAVE_CMOV 1
#elif (defined(__riscv__) || defined(__riscv64)) && \
(defined(__riscv_b) || defined(__riscv_bitmanip))
#define MDBX_HAVE_CMOV 1
#elif defined(i686) || defined(__i686) || defined(__i686__) || \
(defined(_M_IX86) && _M_IX86 > 600) || defined(__x86_64) || \
defined(__x86_64__) || defined(__amd64__) || defined(__amd64) || \
defined(_M_X64) || defined(_M_AMD64)
#define MDBX_HAVE_CMOV 1
#else
#define MDBX_HAVE_CMOV 0
#endif
#endif /* MDBX_HAVE_CMOV */
/*----------------------------------------------------------------------------*/
/* Compiler's includes for builtins/intrinsics */
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
#include <intrin.h>
#elif __GNUC_PREREQ(4, 4) || defined(__clang__)
#if defined(__ia32__) || defined(__e2k__)
#if defined(__e2k__)
#include <e2kintrin.h>
#include <x86intrin.h>
#endif /* __ia32__ */
#endif /* __e2k__ */
#if defined(__ia32__)
#include <cpuid.h>
#include <x86intrin.h>
#endif /* __ia32__ */
#ifdef __ARM_NEON
#include <arm_neon.h>
#endif
#elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
#include <mbarrier.h>
#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \
@ -603,6 +638,16 @@ __extern_C key_t ftok(const char *, int);
#endif
#endif /* __anonymous_struct_extension__ */
#ifndef expect_with_probability
#if defined(__builtin_expect_with_probability) || \
__has_builtin(__builtin_expect_with_probability) || __GNUC_PREREQ(9, 0)
#define expect_with_probability(expr, value, prob) \
__builtin_expect_with_probability(expr, value, prob)
#else
#define expect_with_probability(expr, value, prob) (expr)
#endif
#endif /* expect_with_probability */
#ifndef MDBX_WEAK_IMPORT_ATTRIBUTE
#ifdef WEAK_IMPORT_ATTRIBUTE
#define MDBX_WEAK_IMPORT_ATTRIBUTE WEAK_IMPORT_ATTRIBUTE
@ -616,6 +661,28 @@ __extern_C key_t ftok(const char *, int);
#endif
#endif /* MDBX_WEAK_IMPORT_ATTRIBUTE */
#ifndef MDBX_GOOFY_MSVC_STATIC_ANALYZER
#ifdef _PREFAST_
#define MDBX_GOOFY_MSVC_STATIC_ANALYZER 1
#else
#define MDBX_GOOFY_MSVC_STATIC_ANALYZER 0
#endif
#endif /* MDBX_GOOFY_MSVC_STATIC_ANALYZER */
#if MDBX_GOOFY_MSVC_STATIC_ANALYZER || (defined(_MSC_VER) && _MSC_VER > 1919)
#define MDBX_ANALYSIS_ASSUME(expr) __analysis_assume(expr)
#ifdef _PREFAST_
#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) \
__pragma(prefast(suppress : warn_id))
#else
#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id) \
__pragma(warning(suppress : warn_id))
#endif
#else
#define MDBX_ANALYSIS_ASSUME(expr) assert(expr)
#define MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(warn_id)
#endif /* MDBX_GOOFY_MSVC_STATIC_ANALYZER */
/*----------------------------------------------------------------------------*/
#if defined(MDBX_USE_VALGRIND)

View File

@ -1,11 +1,11 @@
N | MASK | ENV | TXN | DB | PUT | DBI | NODE | PAGE | MRESIZE |
--|---------|-----------|--------------|----------|-----------|------------|---------|----------|---------|
0 |0000 0001|ALLOC_CACHE|TXN_FINISHED | | |DBI_DIRTY |F_BIGDATA|P_BRANCH | |
1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF | |
2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| |
3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | |
4 |0000 0010|ALLOC_FAKE |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | |
5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | |
0 |0000 0001|ALLOC_RSRV |TXN_FINISHED | | |DBI_DIRTY |F_BIGDATA|P_BRANCH | |
1 |0000 0002|ALLOC_UNIMP|TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF | |
2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| |
3 |0000 0008|ALLOC_SSCAN|TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | |
4 |0000 0010|ALLOC_FIFO |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | |
5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | |
6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP | |
7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | | |
8 |0000 0100| _MAY_MOVE | | | | | | | <= |
@ -13,9 +13,9 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD
10|0000 0400| | | | | | | | |
11|0000 0800| | | | | | | | |
12|0000 1000| | | | | | | | |
13|0000 2000| | | | | | |P_SPILLED | |
13|0000 2000|VALIDATION | | | | | |P_SPILLED | |
14|0000 4000|NOSUBDIR | | | | | |P_LOOSE | |
15|0000 8000| | |DB_VALID |NOSPILL | | |P_FROZEN | |
15|0000 8000| | |DB_VALID | | | |P_FROZEN | |
16|0001 0000|SAFE_NOSYNC|TXN_NOSYNC | |RESERVE | |RESERVE | | |
17|0002 0000|RDONLY |TXN_RDONLY | |APPEND | |APPEND | | <= |
18|0004 0000|NOMETASYNC |TXN_NOMETASYNC|CREATE |APPENDDUP | | | | |

View File

@ -26,7 +26,13 @@
#ifndef MDBX_TRUST_RTC_AUTO
#cmakedefine01 MDBX_TRUST_RTC
#endif
#cmakedefine01 MDBX_DISABLE_PAGECHECKS
#cmakedefine01 MDBX_DISABLE_VALIDATION
#cmakedefine01 MDBX_AVOID_MSYNC
#cmakedefine01 MDBX_ENABLE_REFUND
#cmakedefine01 MDBX_ENABLE_MADVISE
#cmakedefine01 MDBX_ENABLE_BIGFOOT
#cmakedefine01 MDBX_ENABLE_PGOP_STAT
#cmakedefine01 MDBX_ENABLE_PROFGC
/* Windows */
#cmakedefine01 MDBX_WITHOUT_MSVC_CRT

17649
src/core.c

File diff suppressed because it is too large Load Diff

View File

@ -1,42 +1,42 @@
#if defined(__GNUC__) && !defined(__LCC__)
#pragma push_macro("mdbx_trace")
#pragma push_macro("mdbx_debug")
#pragma push_macro("mdbx_verbose")
#pragma push_macro("mdbx_notice")
#pragma push_macro("mdbx_warning")
#pragma push_macro("mdbx_error")
#pragma push_macro("mdbx_assert")
#pragma push_macro("TRACE")
#pragma push_macro("DEBUG")
#pragma push_macro("VERBOSE")
#pragma push_macro("NOTICE")
#pragma push_macro("WARNING")
#pragma push_macro("ERROR")
#pragma push_macro("eASSERT")
#undef mdbx_trace
#define mdbx_trace(fmt, ...) \
mdbx_debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef TRACE
#define TRACE(fmt, ...) \
debug_log(MDBX_LOG_TRACE, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef mdbx_debug
#define mdbx_debug(fmt, ...) \
mdbx_debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef DEBUG
#define DEBUG(fmt, ...) \
debug_log(MDBX_LOG_DEBUG, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef mdbx_verbose
#define mdbx_verbose(fmt, ...) \
mdbx_debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef VERBOSE
#define VERBOSE(fmt, ...) \
debug_log(MDBX_LOG_VERBOSE, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef mdbx_notice
#define mdbx_notice(fmt, ...) \
mdbx_debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef NOTICE
#define NOTICE(fmt, ...) \
debug_log(MDBX_LOG_NOTICE, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef mdbx_warning
#define mdbx_warning(fmt, ...) \
mdbx_debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef WARNING
#define WARNING(fmt, ...) \
debug_log(MDBX_LOG_WARN, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef mdbx_error
#define mdbx_error(fmt, ...) \
mdbx_debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef ERROR
#define ERROR(fmt, ...) \
debug_log(MDBX_LOG_ERROR, __func__, __LINE__, fmt "\n", __VA_ARGS__)
#undef mdbx_assert
#define mdbx_assert(env, expr) mdbx_ensure(env, expr)
#undef eASSERT
#define eASSERT(env, expr) ENSURE(env, expr)
#if !defined(__clang__)
#pragma GCC optimize("-O0")
#pragma GCC optimize("-Og")
#endif
#endif /* GCC only */

View File

@ -1,12 +1,12 @@
#if defined(__GNUC__) && !defined(__LCC__)
#pragma pop_macro("mdbx_trace")
#pragma pop_macro("mdbx_debug")
#pragma pop_macro("mdbx_verbose")
#pragma pop_macro("mdbx_notice")
#pragma pop_macro("mdbx_warning")
#pragma pop_macro("mdbx_error")
#pragma pop_macro("mdbx_assert")
#pragma pop_macro("TRACE")
#pragma pop_macro("DEBUG")
#pragma pop_macro("VERBOSE")
#pragma pop_macro("NOTICE")
#pragma pop_macro("WARNING")
#pragma pop_macro("ERROR")
#pragma pop_macro("eASSERT")
#if !defined(__clang__)
#pragma GCC reset_options

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -28,10 +28,11 @@
#include <sys/utsname.h>
#ifndef xMDBX_ALLOY
uint32_t mdbx_linux_kernel_version;
uint32_t linux_kernel_version;
bool mdbx_RunningOnWSL1;
#endif /* xMDBX_ALLOY */
MDBX_EXCLUDE_FOR_GPROF
__cold static uint8_t probe_for_WSL(const char *tag) {
const char *const WSL = strstr(tag, "WSL");
if (WSL && WSL[3] >= '2' && WSL[3] <= '9')
@ -42,14 +43,28 @@ __cold static uint8_t probe_for_WSL(const char *tag) {
if (WSL || wsl || strcasestr(tag, "Microsoft"))
/* Expecting no new kernel within WSL1, either it will explicitly
* marked by an appropriate WSL-version hint. */
return (mdbx_linux_kernel_version < /* 4.19.x */ 0x04130000) ? 1 : 2;
return (linux_kernel_version < /* 4.19.x */ 0x04130000) ? 1 : 2;
return 0;
}
#endif /* Linux */
#ifdef ENABLE_GPROF
extern void _mcleanup(void);
extern void monstartup(unsigned long, unsigned long);
extern void _init(void);
extern void _fini(void);
extern void __gmon_start__(void) __attribute__((__weak__));
#endif /* ENABLE_GPROF */
MDBX_EXCLUDE_FOR_GPROF
__cold static __attribute__((__constructor__)) void
mdbx_global_constructor(void) {
#ifdef ENABLE_GPROF
if (!&__gmon_start__)
monstartup((uintptr_t)&_init, (uintptr_t)&_fini);
#endif /* ENABLE_GPROF */
#if defined(__linux__) || defined(__gnu_linux__)
struct utsname buffer;
if (uname(&buffer) == 0) {
@ -61,7 +76,7 @@ mdbx_global_constructor(void) {
if (number > 0) {
if (number > 255)
number = 255;
mdbx_linux_kernel_version += number << (24 - i * 8);
linux_kernel_version += number << (24 - i * 8);
}
++i;
} else {
@ -81,12 +96,17 @@ mdbx_global_constructor(void) {
}
#endif /* Linux */
mdbx_rthc_global_init();
global_ctor();
}
MDBX_EXCLUDE_FOR_GPROF
__cold static __attribute__((__destructor__)) void
mdbx_global_destructor(void) {
mdbx_rthc_global_dtor();
global_dtor();
#ifdef ENABLE_GPROF
if (!&__gmon_start__)
_mcleanup();
#endif /* ENABLE_GPROF */
}
/*----------------------------------------------------------------------------*/
@ -98,15 +118,15 @@ mdbx_global_destructor(void) {
* размещаются совместно используемые posix-мьютексы (futex). Посредством
* этих мьютексов (см struct MDBX_lockinfo) реализуются:
* - Блокировка таблицы читателей для регистрации,
* т.е. функции mdbx_rdt_lock() и mdbx_rdt_unlock().
* т.е. функции osal_rdt_lock() и osal_rdt_unlock().
* - Блокировка БД для пишущих транзакций,
* т.е. функции mdbx_txn_lock() и mdbx_txn_unlock().
*
* Остальной функционал реализуется отдельно посредством файловых блокировок:
* - Первоначальный захват БД в режиме exclusive/shared и последующий перевод
* в операционный режим, функции mdbx_lck_seize() и mdbx_lck_downgrade().
* в операционный режим, функции osal_lck_seize() и osal_lck_downgrade().
* - Проверка присутствие процессов-читателей,
* т.е. функции mdbx_rpid_set(), mdbx_rpid_clear() и mdbx_rpid_check().
* т.е. функции osal_rpid_set(), osal_rpid_clear() и osal_rpid_check().
*
* Для блокировки файлов используется fcntl(F_SETLK), так как:
* - lockf() оперирует только эксклюзивной блокировкой и требует
@ -150,9 +170,9 @@ mdbx_global_destructor(void) {
static int op_setlk, op_setlkw, op_getlk;
__cold static void choice_fcntl(void) {
assert(!op_setlk && !op_setlkw && !op_getlk);
if ((mdbx_runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0
if ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0
#if defined(__linux__) || defined(__gnu_linux__)
&& mdbx_linux_kernel_version >
&& linux_kernel_version >
0x030f0000 /* OFD locks are available since 3.15, but engages here
only for 3.16 and later kernels (i.e. LTS) because
of reliability reasons */
@ -182,7 +202,7 @@ static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck,
"The bitness of system `off_t` type is mismatch. Please "
"fix build and/or NDK configuration.");
#endif /* Android */
mdbx_jitter4testing(true);
jitter4testing(true);
assert(offset >= 0 && len > 0);
assert((uint64_t)offset < (uint64_t)INT64_MAX &&
(uint64_t)len < (uint64_t)INT64_MAX &&
@ -208,7 +228,7 @@ static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck,
lock_op.l_start = offset;
lock_op.l_len = len;
int rc = MDBX_FCNTL(fd, cmd, &lock_op);
mdbx_jitter4testing(true);
jitter4testing(true);
if (rc != -1) {
if (cmd == op_getlk) {
/* Checks reader by pid. Returns:
@ -243,7 +263,7 @@ static int lck_op(const mdbx_filehandle_t fd, int cmd, const int lck,
}
}
MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait) {
MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait) {
#if MDBX_USE_OFDLOCKS
if (unlikely(op_setlk == 0))
choice_fcntl();
@ -251,21 +271,21 @@ MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait) {
return lck_op(fd, wait ? op_setlkw : op_setlk, F_WRLCK, 0, OFF_T_MAX);
}
MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env) {
MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env) {
assert(env->me_lfd != INVALID_HANDLE_VALUE);
assert(env->me_pid > 0);
if (unlikely(mdbx_getpid() != env->me_pid))
if (unlikely(osal_getpid() != env->me_pid))
return MDBX_PANIC;
return lck_op(env->me_lfd, op_setlk, F_WRLCK, env->me_pid, 1);
}
MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env) {
MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env) {
assert(env->me_lfd != INVALID_HANDLE_VALUE);
assert(env->me_pid > 0);
return lck_op(env->me_lfd, op_setlk, F_UNLCK, env->me_pid, 1);
}
MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid) {
MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid) {
assert(env->me_lfd != INVALID_HANDLE_VALUE);
assert(pid > 0);
return lck_op(env->me_lfd, op_getlk, F_WRLCK, pid, 1);
@ -274,7 +294,7 @@ MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid) {
/*---------------------------------------------------------------------------*/
#if MDBX_LOCKING > MDBX_LOCKING_SYSV
MDBX_INTERNAL_FUNC int mdbx_ipclock_stub(mdbx_ipclock_t *ipc) {
MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX1988
return sem_init(ipc, false, 1) ? errno : 0;
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
@ -285,7 +305,7 @@ MDBX_INTERNAL_FUNC int mdbx_ipclock_stub(mdbx_ipclock_t *ipc) {
#endif
}
MDBX_INTERNAL_FUNC int mdbx_ipclock_destroy(mdbx_ipclock_t *ipc) {
MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX1988
return sem_destroy(ipc) ? errno : 0;
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
@ -303,7 +323,7 @@ static int check_fstat(MDBX_env *env) {
int rc = MDBX_SUCCESS;
if (fstat(env->me_lazy_fd, &st)) {
rc = errno;
mdbx_error("fstat(%s), err %d", "DXB", rc);
ERROR("fstat(%s), err %d", "DXB", rc);
return rc;
}
@ -313,14 +333,13 @@ static int check_fstat(MDBX_env *env) {
#else
rc = EPERM;
#endif
mdbx_error("%s %s, err %d", "DXB",
(st.st_nlink < 1) ? "file was removed" : "not a regular file",
rc);
ERROR("%s %s, err %d", "DXB",
(st.st_nlink < 1) ? "file was removed" : "not a regular file", rc);
return rc;
}
if (st.st_size < (off_t)(MDBX_MIN_PAGESIZE * NUM_METAS)) {
mdbx_verbose("dxb-file is too short (%u), exclusive-lock needed",
VERBOSE("dxb-file is too short (%u), exclusive-lock needed",
(unsigned)st.st_size);
rc = MDBX_RESULT_TRUE;
}
@ -329,7 +348,7 @@ static int check_fstat(MDBX_env *env) {
if (fstat(env->me_lfd, &st)) {
rc = errno;
mdbx_error("fstat(%s), err %d", "LCK", rc);
ERROR("fstat(%s), err %d", "LCK", rc);
return rc;
}
@ -339,16 +358,15 @@ static int check_fstat(MDBX_env *env) {
#else
rc = EPERM;
#endif
mdbx_error("%s %s, err %d", "LCK",
(st.st_nlink < 1) ? "file was removed" : "not a regular file",
rc);
ERROR("%s %s, err %d", "LCK",
(st.st_nlink < 1) ? "file was removed" : "not a regular file", rc);
return rc;
}
/* Checking file size for detect the situation when we got the shared lock
* immediately after mdbx_lck_destroy(). */
* immediately after osal_lck_destroy(). */
if (st.st_size < (off_t)(sizeof(MDBX_lockinfo) + sizeof(MDBX_reader))) {
mdbx_verbose("lck-file is too short (%u), exclusive-lock needed",
VERBOSE("lck-file is too short (%u), exclusive-lock needed",
(unsigned)st.st_size);
rc = MDBX_RESULT_TRUE;
}
@ -356,9 +374,9 @@ static int check_fstat(MDBX_env *env) {
return rc;
}
__cold MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
__cold MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) {
assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
if (unlikely(mdbx_getpid() != env->me_pid))
if (unlikely(osal_getpid() != env->me_pid))
return MDBX_PANIC;
#if MDBX_USE_OFDLOCKS
if (unlikely(op_setlk == 0))
@ -369,7 +387,7 @@ __cold MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
#if defined(__linux__) || defined(__gnu_linux__)
if (unlikely(mdbx_RunningOnWSL1)) {
rc = ENOLCK /* No record locks available */;
mdbx_error("%s, err %u",
ERROR("%s, err %u",
"WSL1 (Windows Subsystem for Linux) is mad and trouble-full, "
"injecting failure to avoid data loss",
rc);
@ -383,8 +401,8 @@ __cold MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
lck_op(env->me_lazy_fd, op_setlk,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX);
if (rc != MDBX_SUCCESS) {
mdbx_error("%s, err %u", "without-lck", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
ERROR("%s, err %u", "without-lck", rc);
eASSERT(env, MDBX_IS_ERROR(rc));
return rc;
}
return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;
@ -397,8 +415,8 @@ retry:
if (rc == MDBX_RESULT_TRUE) {
rc = lck_op(env->me_lfd, op_setlk, F_UNLCK, 0, 1);
if (rc != MDBX_SUCCESS) {
mdbx_error("%s, err %u", "unlock-before-retry", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
ERROR("%s, err %u", "unlock-before-retry", rc);
eASSERT(env, MDBX_IS_ERROR(rc));
return rc;
}
}
@ -424,23 +442,23 @@ retry:
/* the cause may be a collision with POSIX's file-lock recovery. */
if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
rc == EDEADLK)) {
mdbx_error("%s, err %u", "dxb-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
ERROR("%s, err %u", "dxb-exclusive", rc);
eASSERT(env, MDBX_IS_ERROR(rc));
return rc;
}
/* Fallback to lck-shared */
} else if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY ||
rc == EWOULDBLOCK || rc == EDEADLK)) {
mdbx_error("%s, err %u", "try-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
ERROR("%s, err %u", "try-exclusive", rc);
eASSERT(env, MDBX_IS_ERROR(rc));
return rc;
}
/* Here could be one of two:
* - mdbx_lck_destroy() from the another process was hold the lock
* - osal_lck_destroy() from the another process was hold the lock
* during a destruction.
* - either mdbx_lck_seize() from the another process was got the exclusive
* - either osal_lck_seize() from the another process was got the exclusive
* lock and doing initialization.
* For distinguish these cases will use size of the lck-file later. */
@ -449,8 +467,8 @@ retry:
* competing process doesn't call lck_downgrade(). */
rc = lck_op(env->me_lfd, op_setlkw, F_RDLCK, 0, 1);
if (rc != MDBX_SUCCESS) {
mdbx_error("%s, err %u", "try-shared", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
ERROR("%s, err %u", "try-shared", rc);
eASSERT(env, MDBX_IS_ERROR(rc));
return rc;
}
@ -458,7 +476,7 @@ retry:
if (rc == MDBX_RESULT_TRUE)
goto retry;
if (rc != MDBX_SUCCESS) {
mdbx_error("%s, err %u", "lck_fstat", rc);
ERROR("%s, err %u", "lck_fstat", rc);
return rc;
}
@ -469,8 +487,8 @@ retry:
if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
rc == EDEADLK)) {
mdbx_error("%s, err %u", "try-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
ERROR("%s, err %u", "try-exclusive", rc);
eASSERT(env, MDBX_IS_ERROR(rc));
return rc;
}
@ -479,8 +497,8 @@ retry:
lck_op(env->me_lazy_fd, op_setlk,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->me_pid, 1);
if (rc != MDBX_SUCCESS) {
mdbx_error("%s, err %u", "lock-against-without-lck", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
ERROR("%s, err %u", "lock-against-without-lck", rc);
eASSERT(env, MDBX_IS_ERROR(rc));
return rc;
}
@ -488,9 +506,9 @@ retry:
return MDBX_RESULT_FALSE;
}
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) {
assert(env->me_lfd != INVALID_HANDLE_VALUE);
if (unlikely(mdbx_getpid() != env->me_pid))
if (unlikely(osal_getpid() != env->me_pid))
return MDBX_PANIC;
int rc = MDBX_SUCCESS;
@ -503,15 +521,15 @@ MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
if (rc == MDBX_SUCCESS)
rc = lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1);
if (unlikely(rc != 0)) {
mdbx_error("%s, err %u", "lck", rc);
ERROR("%s, err %u", "lck", rc);
assert(MDBX_IS_ERROR(rc));
}
return rc;
}
__cold MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
__cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env,
MDBX_env *inprocess_neighbor) {
if (unlikely(mdbx_getpid() != env->me_pid))
if (unlikely(osal_getpid() != env->me_pid))
return MDBX_PANIC;
int rc = MDBX_SUCCESS;
@ -526,25 +544,25 @@ __cold MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
OFF_T_MAX) == 0) {
mdbx_verbose("%p got exclusive, drown locks", (void *)env);
VERBOSE("%p got exclusive, drown locks", (void *)env);
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
if (env->me_sysv_ipc.semid != -1)
rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0;
#else
rc = mdbx_ipclock_destroy(&lck->mti_rlock);
rc = osal_ipclock_destroy(&lck->mti_rlock);
if (rc == 0)
rc = mdbx_ipclock_destroy(&lck->mti_wlock);
rc = osal_ipclock_destroy(&lck->mti_wlock);
#endif /* MDBX_LOCKING */
mdbx_assert(env, rc == 0);
eASSERT(env, rc == 0);
if (rc == 0) {
const bool synced = lck->mti_unsynced_pages.weak == 0;
mdbx_munmap(&env->me_lck_mmap);
osal_munmap(&env->me_lck_mmap);
if (synced)
rc = ftruncate(env->me_lfd, 0) ? errno : 0;
}
mdbx_jitter4testing(false);
jitter4testing(false);
}
/* 1) POSIX's fcntl() locks (i.e. when op_setlk == F_SETLK) should be restored
@ -585,7 +603,7 @@ __cold MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
/* restore file-locks */
rc = lck_op(inprocess_neighbor->me_lfd, F_SETLKW, F_RDLCK, 0, 1);
if (rc == MDBX_SUCCESS && inprocess_neighbor->me_live_reader)
rc = mdbx_rpid_set(inprocess_neighbor);
rc = osal_rpid_set(inprocess_neighbor);
}
}
@ -596,7 +614,7 @@ __cold MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
/*---------------------------------------------------------------------------*/
__cold MDBX_INTERNAL_FUNC int mdbx_lck_init(MDBX_env *env,
__cold MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env,
MDBX_env *inprocess_neighbor,
int global_uniqueness_flag) {
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
@ -743,7 +761,7 @@ bailout:
#endif /* MDBX_LOCKING > 0 */
}
__cold static int mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
__cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc,
const int err) {
int rc = err;
#if MDBX_LOCKING == MDBX_LOCKING_POSIX2008 || MDBX_LOCKING == MDBX_LOCKING_SYSV
@ -760,10 +778,10 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
rc = MDBX_PANIC;
}
}
mdbx_warning("%clock owner died, %s", (rlocked ? 'r' : 'w'),
WARNING("%clock owner died, %s", (rlocked ? 'r' : 'w'),
(rc ? "this process' env is hosed" : "recovering"));
int check_rc = mdbx_cleanup_dead_readers(env, rlocked, NULL);
int check_rc = cleanup_dead_readers(env, rlocked, NULL);
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
@ -781,7 +799,7 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
check_rc = (mreco_rc == 0) ? check_rc : mreco_rc;
if (unlikely(mreco_rc))
mdbx_error("lock recovery failed, %s", mdbx_strerror(mreco_rc));
ERROR("lock recovery failed, %s", mdbx_strerror(mreco_rc));
rc = (rc == MDBX_SUCCESS) ? check_rc : rc;
if (MDBX_IS_ERROR(rc))
@ -804,19 +822,19 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
#error "FIXME"
#endif /* MDBX_LOCKING */
mdbx_error("mutex (un)lock failed, %s", mdbx_strerror(err));
ERROR("mutex (un)lock failed, %s", mdbx_strerror(err));
if (rc != EDEADLK)
env->me_flags |= MDBX_FATAL_ERROR;
return rc;
}
#if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC)
MDBX_INTERNAL_FUNC int mdbx_check_tid4bionic(void) {
MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void) {
/* avoid 32-bit Bionic bug/hang with 32-pit TID */
if (sizeof(pthread_mutex_t) < sizeof(pid_t) + sizeof(unsigned)) {
pid_t tid = gettid();
if (unlikely(tid > 0xffff)) {
mdbx_fatal("Raise the ENOSYS(%d) error to avoid hang due "
FATAL("Raise the ENOSYS(%d) error to avoid hang due "
"the 32-bit Bionic/Android bug with tid/thread_id 0x%08x(%i) "
"that dont fit in 16 bits, see "
"https://android.googlesource.com/platform/bionic/+/master/"
@ -829,11 +847,11 @@ MDBX_INTERNAL_FUNC int mdbx_check_tid4bionic(void) {
}
#endif /* __ANDROID_API__ || ANDROID) || BIONIC */
static int mdbx_ipclock_lock(MDBX_env *env, mdbx_ipclock_t *ipc,
static int mdbx_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc,
const bool dont_wait) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
MDBX_LOCKING == MDBX_LOCKING_POSIX2008
int rc = mdbx_check_tid4bionic();
int rc = osal_check_tid4bionic();
if (likely(rc == 0))
rc = dont_wait ? pthread_mutex_trylock(ipc) : pthread_mutex_lock(ipc);
rc = (rc == EBUSY && dont_wait) ? MDBX_BUSY : rc;
@ -869,7 +887,7 @@ static int mdbx_ipclock_lock(MDBX_env *env, mdbx_ipclock_t *ipc,
return rc;
}
static int mdbx_ipclock_unlock(MDBX_env *env, mdbx_ipclock_t *ipc) {
static int mdbx_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
MDBX_LOCKING == MDBX_LOCKING_POSIX2008
int rc = pthread_mutex_unlock(ipc);
@ -891,38 +909,38 @@ static int mdbx_ipclock_unlock(MDBX_env *env, mdbx_ipclock_t *ipc) {
return rc;
}
MDBX_INTERNAL_FUNC int mdbx_rdt_lock(MDBX_env *env) {
mdbx_trace("%s", ">>");
mdbx_jitter4testing(true);
MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) {
TRACE("%s", ">>");
jitter4testing(true);
int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_rlock, false);
mdbx_trace("<< rc %d", rc);
TRACE("<< rc %d", rc);
return rc;
}
MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
mdbx_trace("%s", ">>");
MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) {
TRACE("%s", ">>");
int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_rlock);
mdbx_trace("<< rc %d", rc);
TRACE("<< rc %d", rc);
if (unlikely(rc != MDBX_SUCCESS))
mdbx_panic("%s() failed: err %d\n", __func__, rc);
mdbx_jitter4testing(true);
jitter4testing(true);
}
int mdbx_txn_lock(MDBX_env *env, bool dont_wait) {
mdbx_trace("%swait %s", dont_wait ? "dont-" : "", ">>");
mdbx_jitter4testing(true);
TRACE("%swait %s", dont_wait ? "dont-" : "", ">>");
jitter4testing(true);
int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait);
mdbx_trace("<< rc %d", rc);
TRACE("<< rc %d", rc);
return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS;
}
void mdbx_txn_unlock(MDBX_env *env) {
mdbx_trace("%s", ">>");
TRACE("%s", ">>");
int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock);
mdbx_trace("<< rc %d", rc);
TRACE("<< rc %d", rc);
if (unlikely(rc != MDBX_SUCCESS))
mdbx_panic("%s() failed: err %d\n", __func__, rc);
mdbx_jitter4testing(true);
jitter4testing(true);
}
#else

View File

@ -1,5 +1,5 @@
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -48,16 +48,16 @@ static
switch (reason) {
case DLL_PROCESS_ATTACH:
mdbx_winnt_import();
mdbx_rthc_global_init();
global_ctor();
break;
case DLL_PROCESS_DETACH:
mdbx_rthc_global_dtor();
global_dtor();
break;
case DLL_THREAD_ATTACH:
break;
case DLL_THREAD_DETACH:
mdbx_rthc_thread_dtor(module);
thread_dtor(module);
break;
}
#if MDBX_BUILD_SHARED_LIBRARY
@ -112,32 +112,71 @@ static
#define LCK_WAITFOR 0
#define LCK_DONTWAIT LOCKFILE_FAIL_IMMEDIATELY
static __inline BOOL flock(mdbx_filehandle_t fd, DWORD flags, uint64_t offset,
size_t bytes) {
static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags,
size_t offset, size_t bytes) {
TRACE("lock>>: fd %p, event %p, flags 0x%x offset %zu, bytes %zu >>", fd,
event, flags, offset, bytes);
OVERLAPPED ov;
ov.hEvent = 0;
ov.Internal = 0;
ov.InternalHigh = 0;
ov.hEvent = event;
ov.Offset = (DWORD)offset;
ov.OffsetHigh = HIGH_DWORD(offset);
return LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov);
if (LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov)) {
TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd,
event, flags, offset, bytes, "done");
return MDBX_SUCCESS;
}
DWORD rc = GetLastError();
if (rc == ERROR_IO_PENDING) {
if (event) {
if (GetOverlappedResult(fd, &ov, &rc, true)) {
TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s",
fd, event, flags, offset, bytes, "overlapped-done");
return MDBX_SUCCESS;
}
rc = GetLastError();
} else
CancelIo(fd);
}
TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << err %d",
fd, event, flags, offset, bytes, (int)rc);
return (int)rc;
}
static __inline BOOL funlock(mdbx_filehandle_t fd, uint64_t offset,
static __inline int flock(HANDLE fd, unsigned flags, size_t offset,
size_t bytes) {
return flock_with_event(fd, 0, flags, offset, bytes);
}
static __inline int flock_data(const MDBX_env *env, unsigned flags,
size_t offset, size_t bytes) {
const HANDLE fd4data =
env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd;
return flock_with_event(fd4data, env->me_data_lock_event, flags, offset,
bytes);
}
static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) {
TRACE("unlock: fd %p, offset %zu, bytes %zu", fd, offset, bytes);
return UnlockFile(fd, (DWORD)offset, HIGH_DWORD(offset), (DWORD)bytes,
HIGH_DWORD(bytes));
HIGH_DWORD(bytes))
? MDBX_SUCCESS
: (int)GetLastError();
}
/*----------------------------------------------------------------------------*/
/* global `write` lock for write-txt processing,
* exclusive locking both meta-pages) */
#define LCK_MAXLEN (1u + ((~(size_t)0) >> 1))
#define LCK_META_OFFSET 0
#define LCK_META_LEN (MAX_PAGESIZE * NUM_METAS)
#define LCK_BODY_OFFSET LCK_META_LEN
#define LCK_BODY_LEN (LCK_MAXLEN - LCK_BODY_OFFSET)
#define LCK_BODY LCK_BODY_OFFSET, LCK_BODY_LEN
#define LCK_WHOLE 0, LCK_MAXLEN
#ifdef _WIN64
#define DXB_MAXLEN UINT64_C(0x7fffFFFFfff00000)
#else
#define DXB_MAXLEN UINT32_C(0x7ff00000)
#endif
#define DXB_BODY (env->me_psize * (size_t)NUM_METAS), DXB_MAXLEN
#define DXB_WHOLE 0, DXB_MAXLEN
int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
if (dontwait) {
@ -155,38 +194,49 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
}
}
if ((env->me_flags & MDBX_EXCLUSIVE) ||
flock(env->me_lazy_fd,
if (env->me_flags & MDBX_EXCLUSIVE) {
/* Zap: Failing to release lock 'env->me_windowsbug_lock'
* in function 'mdbx_txn_lock' */
MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115);
return MDBX_SUCCESS;
}
const HANDLE fd4data =
env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd;
int rc = flock_with_event(fd4data, env->me_data_lock_event,
dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT)
: (LCK_EXCLUSIVE | LCK_WAITFOR),
LCK_BODY))
return MDBX_SUCCESS;
int rc = (int)GetLastError();
DXB_BODY);
if (rc == ERROR_LOCK_VIOLATION && dontwait) {
SleepEx(0, true);
if (flock(env->me_lazy_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY))
return MDBX_SUCCESS;
rc = (int)GetLastError();
rc = flock_with_event(fd4data, env->me_data_lock_event,
LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY);
if (rc == ERROR_LOCK_VIOLATION) {
SleepEx(0, true);
if (flock(env->me_lazy_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY))
return MDBX_SUCCESS;
rc = (int)GetLastError();
rc = flock_with_event(fd4data, env->me_data_lock_event,
LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_BODY);
}
}
if (rc == MDBX_SUCCESS) {
/* Zap: Failing to release lock 'env->me_windowsbug_lock'
* in function 'mdbx_txn_lock' */
MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115);
return rc;
}
LeaveCriticalSection(&env->me_windowsbug_lock);
return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY;
}
void mdbx_txn_unlock(MDBX_env *env) {
int rc = (env->me_flags & MDBX_EXCLUSIVE)
? TRUE
: funlock(env->me_lazy_fd, LCK_BODY);
if ((env->me_flags & MDBX_EXCLUSIVE) == 0) {
const HANDLE fd4data =
env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd;
int err = funlock(fd4data, DXB_BODY);
if (err != MDBX_SUCCESS)
mdbx_panic("%s failed: err %u", __func__, err);
}
LeaveCriticalSection(&env->me_windowsbug_lock);
if (!rc)
mdbx_panic("%s failed: err %u", __func__, (int)GetLastError());
}
/*----------------------------------------------------------------------------*/
@ -200,56 +250,57 @@ void mdbx_txn_unlock(MDBX_env *env) {
#define LCK_LOWER LCK_LO_OFFSET, LCK_LO_LEN
#define LCK_UPPER LCK_UP_OFFSET, LCK_UP_LEN
MDBX_INTERNAL_FUNC int mdbx_rdt_lock(MDBX_env *env) {
mdbx_srwlock_AcquireShared(&env->me_remap_guard);
MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) {
osal_srwlock_AcquireShared(&env->me_remap_guard);
if (env->me_lfd == INVALID_HANDLE_VALUE)
return MDBX_SUCCESS; /* readonly database in readonly filesystem */
/* transition from S-? (used) to S-E (locked),
* e.g. exclusive lock upper-part */
if ((env->me_flags & MDBX_EXCLUSIVE) ||
flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER))
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_SUCCESS;
int rc = (int)GetLastError();
mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
int rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER);
if (rc == MDBX_SUCCESS)
return MDBX_SUCCESS;
osal_srwlock_ReleaseShared(&env->me_remap_guard);
return rc;
}
MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
if (env->me_lfd != INVALID_HANDLE_VALUE) {
MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) {
if (env->me_lfd != INVALID_HANDLE_VALUE &&
(env->me_flags & MDBX_EXCLUSIVE) == 0) {
/* transition from S-E (locked) to S-? (used), e.g. unlock upper-part */
if ((env->me_flags & MDBX_EXCLUSIVE) == 0 &&
!funlock(env->me_lfd, LCK_UPPER))
mdbx_panic("%s failed: err %u", __func__, (int)GetLastError());
int err = funlock(env->me_lfd, LCK_UPPER);
if (err != MDBX_SUCCESS)
mdbx_panic("%s failed: err %u", __func__, err);
}
mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
osal_srwlock_ReleaseShared(&env->me_remap_guard);
}
MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait) {
return flock(fd,
wait ? LCK_EXCLUSIVE | LCK_WAITFOR
: LCK_EXCLUSIVE | LCK_DONTWAIT,
0, LCK_MAXLEN)
? MDBX_SUCCESS
: (int)GetLastError();
MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait) {
return flock(
fd, wait ? LCK_EXCLUSIVE | LCK_WAITFOR : LCK_EXCLUSIVE | LCK_DONTWAIT, 0,
DXB_MAXLEN);
}
static int suspend_and_append(mdbx_handle_array_t **array,
const DWORD ThreadId) {
const unsigned limit = (*array)->limit;
if ((*array)->count == limit) {
void *ptr = mdbx_realloc(
(limit > ARRAY_LENGTH((*array)->handles))
mdbx_handle_array_t *const ptr =
osal_realloc((limit > ARRAY_LENGTH((*array)->handles))
? *array
: /* don't free initial array on the stack */ NULL,
sizeof(mdbx_handle_array_t) +
sizeof(HANDLE) * (limit * 2 - ARRAY_LENGTH((*array)->handles)));
sizeof(HANDLE) * (limit * (size_t)2 -
ARRAY_LENGTH((*array)->handles)));
if (!ptr)
return MDBX_ENOMEM;
if (limit == ARRAY_LENGTH((*array)->handles))
memcpy(ptr, *array, sizeof(mdbx_handle_array_t));
*array = (mdbx_handle_array_t *)ptr;
*ptr = **array;
*array = ptr;
(*array)->limit = limit * 2;
}
@ -273,8 +324,8 @@ static int suspend_and_append(mdbx_handle_array_t **array,
}
MDBX_INTERNAL_FUNC int
mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
mdbx_assert(env, (env->me_flags & MDBX_NOTLS) == 0);
osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
eASSERT(env, (env->me_flags & MDBX_NOTLS) == 0);
const uintptr_t CurrentTid = GetCurrentThreadId();
int rc;
if (env->me_lck_mmap.lck) {
@ -296,7 +347,7 @@ mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
rc = suspend_and_append(array, (mdbx_tid_t)reader->mr_tid.weak);
if (rc != MDBX_SUCCESS) {
bailout_lck:
(void)mdbx_resume_threads_after_remap(*array);
(void)osal_resume_threads_after_remap(*array);
return rc;
}
}
@ -308,7 +359,7 @@ mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
} else {
/* Without LCK (i.e. read-only mode).
* Walk through a snapshot of all running threads */
mdbx_assert(env, env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY));
eASSERT(env, env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY));
const HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
if (hSnapshot == INVALID_HANDLE_VALUE)
return (int)GetLastError();
@ -320,7 +371,7 @@ mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
rc = (int)GetLastError();
bailout_toolhelp:
CloseHandle(hSnapshot);
(void)mdbx_resume_threads_after_remap(*array);
(void)osal_resume_threads_after_remap(*array);
return rc;
}
@ -345,7 +396,7 @@ mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
}
MDBX_INTERNAL_FUNC int
mdbx_resume_threads_after_remap(mdbx_handle_array_t *array) {
osal_resume_threads_after_remap(mdbx_handle_array_t *array) {
int rc = MDBX_SUCCESS;
for (unsigned i = 0; i < array->count; ++i) {
const HANDLE hThread = array->handles[i];
@ -384,11 +435,11 @@ mdbx_resume_threads_after_remap(mdbx_handle_array_t *array) {
* E-S
* E-E = exclusive-write, i.e. exclusive due (re)initialization
*
* The mdbx_lck_seize() moves the locking-FSM from the initial free/unlocked
* The osal_lck_seize() moves the locking-FSM from the initial free/unlocked
* state to the "exclusive write" (and returns MDBX_RESULT_TRUE) if possible,
* or to the "used" (and returns MDBX_RESULT_FALSE).
*
* The mdbx_lck_downgrade() moves the locking-FSM from "exclusive write"
* The osal_lck_downgrade() moves the locking-FSM from "exclusive write"
* state to the "used" (i.e. shared) state.
*
* The mdbx_lck_upgrade() moves the locking-FSM from "used" (i.e. shared)
@ -400,40 +451,38 @@ static void lck_unlock(MDBX_env *env) {
if (env->me_lfd != INVALID_HANDLE_VALUE) {
/* double `unlock` for robustly remove overlapped shared/exclusive locks */
while (funlock(env->me_lfd, LCK_LOWER))
;
err = (int)GetLastError();
do
err = funlock(env->me_lfd, LCK_LOWER);
while (err == MDBX_SUCCESS);
assert(err == ERROR_NOT_LOCKED ||
(mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
(void)err;
SetLastError(ERROR_SUCCESS);
while (funlock(env->me_lfd, LCK_UPPER))
;
err = (int)GetLastError();
do
err = funlock(env->me_lfd, LCK_UPPER);
while (err == MDBX_SUCCESS);
assert(err == ERROR_NOT_LOCKED ||
(mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
(void)err;
SetLastError(ERROR_SUCCESS);
}
if (env->me_lazy_fd != INVALID_HANDLE_VALUE) {
const HANDLE fd4data =
env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd;
if (fd4data != INVALID_HANDLE_VALUE) {
/* explicitly unlock to avoid latency for other processes (windows kernel
* releases such locks via deferred queues) */
while (funlock(env->me_lazy_fd, LCK_BODY))
;
err = (int)GetLastError();
do
err = funlock(fd4data, DXB_BODY);
while (err == MDBX_SUCCESS);
assert(err == ERROR_NOT_LOCKED ||
(mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
(void)err;
SetLastError(ERROR_SUCCESS);
while (funlock(env->me_lazy_fd, LCK_WHOLE))
;
err = (int)GetLastError();
do
err = funlock(fd4data, DXB_WHOLE);
while (err == MDBX_SUCCESS);
assert(err == ERROR_NOT_LOCKED ||
(mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION));
(void)err;
SetLastError(ERROR_SUCCESS);
}
}
@ -442,56 +491,57 @@ static void lck_unlock(MDBX_env *env) {
* or as 'used' (S-? and returns MDBX_RESULT_FALSE).
* Otherwise returns an error. */
static int internal_seize_lck(HANDLE lfd) {
int rc;
assert(lfd != INVALID_HANDLE_VALUE);
/* 1) now on ?-? (free), get ?-E (middle) */
mdbx_jitter4testing(false);
if (!flock(lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) {
rc = (int)GetLastError() /* 2) something went wrong, give up */;
mdbx_error("%s, err %u", "?-?(free) >> ?-E(middle)", rc);
jitter4testing(false);
int rc = flock(lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER);
if (rc != MDBX_SUCCESS) {
/* 2) something went wrong, give up */;
ERROR("%s, err %u", "?-?(free) >> ?-E(middle)", rc);
return rc;
}
/* 3) now on ?-E (middle), try E-E (exclusive-write) */
mdbx_jitter4testing(false);
if (flock(lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER))
jitter4testing(false);
rc = flock(lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER);
if (rc == MDBX_SUCCESS)
return MDBX_RESULT_TRUE /* 4) got E-E (exclusive-write), done */;
/* 5) still on ?-E (middle) */
rc = (int)GetLastError();
mdbx_jitter4testing(false);
jitter4testing(false);
if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
/* 6) something went wrong, give up */
if (!funlock(lfd, LCK_UPPER))
rc = funlock(lfd, LCK_UPPER);
if (rc != MDBX_SUCCESS)
mdbx_panic("%s(%s) failed: err %u", __func__, "?-E(middle) >> ?-?(free)",
(int)GetLastError());
rc);
return rc;
}
/* 7) still on ?-E (middle), try S-E (locked) */
mdbx_jitter4testing(false);
rc = flock(lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER) ? MDBX_RESULT_FALSE
: (int)GetLastError();
jitter4testing(false);
rc = flock(lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER);
mdbx_jitter4testing(false);
if (rc != MDBX_RESULT_FALSE)
mdbx_error("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
jitter4testing(false);
if (rc != MDBX_SUCCESS)
ERROR("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
/* 8) now on S-E (locked) or still on ?-E (middle),
* transition to S-? (used) or ?-? (free) */
if (!funlock(lfd, LCK_UPPER))
int err = funlock(lfd, LCK_UPPER);
if (err != MDBX_SUCCESS)
mdbx_panic("%s(%s) failed: err %u", __func__,
"X-E(locked/middle) >> X-?(used/free)", (int)GetLastError());
"X-E(locked/middle) >> X-?(used/free)", err);
/* 9) now on S-? (used, DONE) or ?-? (free, FAILURE) */
return rc;
}
MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
int rc;
assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env) {
const HANDLE fd4data =
env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd;
assert(fd4data != INVALID_HANDLE_VALUE);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_RESULT_TRUE /* nope since files were must be opened
non-shareable */
@ -499,17 +549,15 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
if (env->me_lfd == INVALID_HANDLE_VALUE) {
/* LY: without-lck mode (e.g. on read-only filesystem) */
mdbx_jitter4testing(false);
if (!flock(env->me_lazy_fd, LCK_SHARED | LCK_DONTWAIT, LCK_WHOLE)) {
rc = (int)GetLastError();
mdbx_error("%s, err %u", "without-lck", rc);
jitter4testing(false);
int rc = flock_data(env, LCK_SHARED | LCK_DONTWAIT, DXB_WHOLE);
if (rc != MDBX_SUCCESS)
ERROR("%s, err %u", "without-lck", rc);
return rc;
}
return MDBX_RESULT_FALSE;
}
rc = internal_seize_lck(env->me_lfd);
mdbx_jitter4testing(false);
int rc = internal_seize_lck(env->me_lfd);
jitter4testing(false);
if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_RDONLY) == 0) {
/* Check that another process don't operates in without-lck mode.
* Doing such check by exclusive locking the body-part of db. Should be
@ -517,46 +565,52 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
* - we need an exclusive lock for do so;
* - we can't lock meta-pages, otherwise other process could get an error
* while opening db in valid (non-conflict) mode. */
if (!flock(env->me_lazy_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY)) {
rc = (int)GetLastError();
mdbx_error("%s, err %u", "lock-against-without-lck", rc);
mdbx_jitter4testing(false);
int err = flock_data(env, LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_WHOLE);
if (err != MDBX_SUCCESS) {
ERROR("%s, err %u", "lock-against-without-lck", err);
jitter4testing(false);
lck_unlock(env);
} else {
mdbx_jitter4testing(false);
if (!funlock(env->me_lazy_fd, LCK_BODY))
mdbx_panic("%s(%s) failed: err %u", __func__,
"unlock-against-without-lck", (int)GetLastError());
return err;
}
jitter4testing(false);
err = funlock(fd4data, DXB_WHOLE);
if (err != MDBX_SUCCESS)
mdbx_panic("%s(%s) failed: err %u", __func__,
"unlock-against-without-lck", err);
}
return rc;
}
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) {
const HANDLE fd4data =
env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd;
/* Transite from exclusive-write state (E-E) to used (S-?) */
assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
assert(fd4data != INVALID_HANDLE_VALUE);
assert(env->me_lfd != INVALID_HANDLE_VALUE);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_SUCCESS /* nope since files were must be opened non-shareable */
;
/* 1) now at E-E (exclusive-write), transition to ?_E (middle) */
if (!funlock(env->me_lfd, LCK_LOWER))
int rc = funlock(env->me_lfd, LCK_LOWER);
if (rc != MDBX_SUCCESS)
mdbx_panic("%s(%s) failed: err %u", __func__,
"E-E(exclusive-write) >> ?-E(middle)", (int)GetLastError());
"E-E(exclusive-write) >> ?-E(middle)", rc);
/* 2) now at ?-E (middle), transition to S-E (locked) */
if (!flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)) {
int rc = (int)GetLastError() /* 3) something went wrong, give up */;
mdbx_error("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
rc = flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER);
if (rc != MDBX_SUCCESS) {
/* 3) something went wrong, give up */;
ERROR("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
return rc;
}
/* 4) got S-E (locked), continue transition to S-? (used) */
if (!funlock(env->me_lfd, LCK_UPPER))
rc = funlock(env->me_lfd, LCK_UPPER);
if (rc != MDBX_SUCCESS)
mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> S-?(used)",
(int)GetLastError());
rc);
return MDBX_SUCCESS /* 5) now at S-? (used), done */;
}
@ -569,53 +623,72 @@ MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) {
return MDBX_SUCCESS /* nope since files were must be opened non-shareable */
;
int rc;
/* 1) now on S-? (used), try S-E (locked) */
mdbx_jitter4testing(false);
if (!flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_UPPER)) {
rc = (int)GetLastError() /* 2) something went wrong, give up */;
mdbx_verbose("%s, err %u", "S-?(used) >> S-E(locked)", rc);
jitter4testing(false);
int rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_UPPER);
if (rc != MDBX_SUCCESS) {
/* 2) something went wrong, give up */;
VERBOSE("%s, err %u", "S-?(used) >> S-E(locked)", rc);
return rc;
}
/* 3) now on S-E (locked), transition to ?-E (middle) */
if (!funlock(env->me_lfd, LCK_LOWER))
rc = funlock(env->me_lfd, LCK_LOWER);
if (rc != MDBX_SUCCESS)
mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> ?-E(middle)",
(int)GetLastError());
rc);
/* 4) now on ?-E (middle), try E-E (exclusive-write) */
mdbx_jitter4testing(false);
if (!flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER)) {
rc = (int)GetLastError() /* 5) something went wrong, give up */;
mdbx_verbose("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc);
jitter4testing(false);
rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER);
if (rc != MDBX_SUCCESS) {
/* 5) something went wrong, give up */;
VERBOSE("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc);
return rc;
}
return MDBX_SUCCESS /* 6) now at E-E (exclusive-write), done */;
}
MDBX_INTERNAL_FUNC int mdbx_lck_init(MDBX_env *env,
MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env,
MDBX_env *inprocess_neighbor,
int global_uniqueness_flag) {
(void)env;
(void)inprocess_neighbor;
(void)global_uniqueness_flag;
if (mdbx_SetFileIoOverlappedRange && !(env->me_flags & MDBX_RDONLY)) {
HANDLE token = INVALID_HANDLE_VALUE;
TOKEN_PRIVILEGES privileges;
privileges.PrivilegeCount = 1;
privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES,
&token) ||
!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
&privileges.Privileges[0].Luid) ||
!AdjustTokenPrivileges(token, FALSE, &privileges, sizeof(privileges),
nullptr, nullptr) ||
GetLastError() != ERROR_SUCCESS)
mdbx_SetFileIoOverlappedRange = NULL;
if (token != INVALID_HANDLE_VALUE)
CloseHandle(token);
}
return MDBX_SUCCESS;
}
MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env,
MDBX_env *inprocess_neighbor) {
/* LY: should unmap before releasing the locks to avoid race condition and
* STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */
if (env->me_map)
mdbx_munmap(&env->me_dxb_mmap);
osal_munmap(&env->me_dxb_mmap);
if (env->me_lck_mmap.lck) {
const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0;
mdbx_munmap(&env->me_lck_mmap);
osal_munmap(&env->me_lck_mmap);
if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE &&
mdbx_lck_upgrade(env) == MDBX_SUCCESS)
/* this will fail if LCK is used/mmapped by other process(es) */
mdbx_ftruncate(env->me_lfd, 0);
osal_ftruncate(env->me_lfd, 0);
}
lck_unlock(env);
return MDBX_SUCCESS;
@ -624,12 +697,12 @@ MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
/*----------------------------------------------------------------------------*/
/* reader checking (by pid) */
MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env) {
MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env) {
(void)env;
return MDBX_SUCCESS;
}
MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env) {
MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env) {
(void)env;
return MDBX_SUCCESS;
}
@ -640,7 +713,7 @@ MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env) {
* MDBX_RESULT_TRUE, if pid is live (unable to acquire lock)
* MDBX_RESULT_FALSE, if pid is dead (lock acquired)
* or otherwise the errcode. */
MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid) {
MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid) {
(void)env;
HANDLE hProcess = OpenProcess(SYNCHRONIZE, FALSE, pid);
int rc;
@ -677,18 +750,18 @@ MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid) {
// Stub for slim read-write lock
// Copyright (C) 1995-2002 Brad Wilson
static void WINAPI stub_srwlock_Init(MDBX_srwlock *srwl) {
static void WINAPI stub_srwlock_Init(osal_srwlock_t *srwl) {
srwl->readerCount = srwl->writerCount = 0;
}
static void WINAPI stub_srwlock_AcquireShared(MDBX_srwlock *srwl) {
static void WINAPI stub_srwlock_AcquireShared(osal_srwlock_t *srwl) {
while (true) {
assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
// If there's a writer already, spin without unnecessarily
// interlocking the CPUs
if (srwl->writerCount != 0) {
YieldProcessor();
SwitchToThread();
continue;
}
@ -702,23 +775,23 @@ static void WINAPI stub_srwlock_AcquireShared(MDBX_srwlock *srwl) {
// Remove from the readers list, spin, try again
_InterlockedDecrement(&srwl->readerCount);
YieldProcessor();
SwitchToThread();
}
}
static void WINAPI stub_srwlock_ReleaseShared(MDBX_srwlock *srwl) {
static void WINAPI stub_srwlock_ReleaseShared(osal_srwlock_t *srwl) {
assert(srwl->readerCount > 0);
_InterlockedDecrement(&srwl->readerCount);
}
static void WINAPI stub_srwlock_AcquireExclusive(MDBX_srwlock *srwl) {
static void WINAPI stub_srwlock_AcquireExclusive(osal_srwlock_t *srwl) {
while (true) {
assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
// If there's a writer already, spin without unnecessarily
// interlocking the CPUs
if (srwl->writerCount != 0) {
YieldProcessor();
SwitchToThread();
continue;
}
@ -733,11 +806,11 @@ static void WINAPI stub_srwlock_AcquireExclusive(MDBX_srwlock *srwl) {
// that we're the writer.
while (srwl->readerCount != 0) {
assert(srwl->writerCount >= 0 && srwl->readerCount >= 0);
YieldProcessor();
SwitchToThread();
}
}
static void WINAPI stub_srwlock_ReleaseExclusive(MDBX_srwlock *srwl) {
static void WINAPI stub_srwlock_ReleaseExclusive(osal_srwlock_t *srwl) {
assert(srwl->writerCount == 1 && srwl->readerCount >= 0);
srwl->writerCount = 0;
}
@ -753,9 +826,9 @@ static uint64_t WINAPI stub_GetTickCount64(void) {
/*----------------------------------------------------------------------------*/
#ifndef xMDBX_ALLOY
MDBX_srwlock_function mdbx_srwlock_Init, mdbx_srwlock_AcquireShared,
mdbx_srwlock_ReleaseShared, mdbx_srwlock_AcquireExclusive,
mdbx_srwlock_ReleaseExclusive;
osal_srwlock_t_function osal_srwlock_Init, osal_srwlock_AcquireShared,
osal_srwlock_ReleaseShared, osal_srwlock_AcquireExclusive,
osal_srwlock_ReleaseExclusive;
MDBX_NtExtendSection mdbx_NtExtendSection;
MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx;
@ -766,6 +839,7 @@ MDBX_NtFsControlFile mdbx_NtFsControlFile;
MDBX_PrefetchVirtualMemory mdbx_PrefetchVirtualMemory;
MDBX_GetTickCount64 mdbx_GetTickCount64;
MDBX_RegGetValueA mdbx_RegGetValueA;
MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
#endif /* xMDBX_ALLOY */
#if __GNUC_PREREQ(8, 0)
@ -774,11 +848,11 @@ MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* GCC/MINGW */
static void mdbx_winnt_import(void) {
const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll");
#define GET_PROC_ADDR(dll, ENTRY) \
mdbx_##ENTRY = (MDBX_##ENTRY)GetProcAddress(dll, #ENTRY)
const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll");
if (hNtdll) {
if (GetProcAddress(hNtdll, "wine_get_version")) {
assert(mdbx_RunningUnderWine());
} else {
@ -786,8 +860,10 @@ static void mdbx_winnt_import(void) {
GET_PROC_ADDR(hNtdll, NtExtendSection);
assert(!mdbx_RunningUnderWine());
}
}
const HINSTANCE hKernel32dll = GetModuleHandleA("kernel32.dll");
if (hKernel32dll) {
GET_PROC_ADDR(hKernel32dll, GetFileInformationByHandleEx);
GET_PROC_ADDR(hKernel32dll, GetTickCount64);
if (!mdbx_GetTickCount64)
@ -797,31 +873,38 @@ static void mdbx_winnt_import(void) {
GET_PROC_ADDR(hKernel32dll, GetVolumeInformationByHandleW);
GET_PROC_ADDR(hKernel32dll, GetFinalPathNameByHandleW);
GET_PROC_ADDR(hKernel32dll, PrefetchVirtualMemory);
GET_PROC_ADDR(hKernel32dll, SetFileIoOverlappedRange);
}
}
const osal_srwlock_t_function init =
(osal_srwlock_t_function)(hKernel32dll
? GetProcAddress(hKernel32dll,
"InitializeSRWLock")
: nullptr);
if (init != NULL) {
osal_srwlock_Init = init;
osal_srwlock_AcquireShared = (osal_srwlock_t_function)GetProcAddress(
hKernel32dll, "AcquireSRWLockShared");
osal_srwlock_ReleaseShared = (osal_srwlock_t_function)GetProcAddress(
hKernel32dll, "ReleaseSRWLockShared");
osal_srwlock_AcquireExclusive = (osal_srwlock_t_function)GetProcAddress(
hKernel32dll, "AcquireSRWLockExclusive");
osal_srwlock_ReleaseExclusive = (osal_srwlock_t_function)GetProcAddress(
hKernel32dll, "ReleaseSRWLockExclusive");
} else {
osal_srwlock_Init = stub_srwlock_Init;
osal_srwlock_AcquireShared = stub_srwlock_AcquireShared;
osal_srwlock_ReleaseShared = stub_srwlock_ReleaseShared;
osal_srwlock_AcquireExclusive = stub_srwlock_AcquireExclusive;
osal_srwlock_ReleaseExclusive = stub_srwlock_ReleaseExclusive;
}
const HINSTANCE hAdvapi32dll = GetModuleHandleA("advapi32.dll");
if (hAdvapi32dll) {
GET_PROC_ADDR(hAdvapi32dll, RegGetValueA);
#undef GET_PROC_ADDR
const MDBX_srwlock_function init =
(MDBX_srwlock_function)GetProcAddress(hKernel32dll, "InitializeSRWLock");
if (init != NULL) {
mdbx_srwlock_Init = init;
mdbx_srwlock_AcquireShared = (MDBX_srwlock_function)GetProcAddress(
hKernel32dll, "AcquireSRWLockShared");
mdbx_srwlock_ReleaseShared = (MDBX_srwlock_function)GetProcAddress(
hKernel32dll, "ReleaseSRWLockShared");
mdbx_srwlock_AcquireExclusive = (MDBX_srwlock_function)GetProcAddress(
hKernel32dll, "AcquireSRWLockExclusive");
mdbx_srwlock_ReleaseExclusive = (MDBX_srwlock_function)GetProcAddress(
hKernel32dll, "ReleaseSRWLockExclusive");
} else {
mdbx_srwlock_Init = stub_srwlock_Init;
mdbx_srwlock_AcquireShared = stub_srwlock_AcquireShared;
mdbx_srwlock_ReleaseShared = stub_srwlock_ReleaseShared;
mdbx_srwlock_AcquireExclusive = stub_srwlock_AcquireExclusive;
mdbx_srwlock_ReleaseExclusive = stub_srwlock_ReleaseExclusive;
}
#undef GET_PROC_ADDR
}
#if __GNUC_PREREQ(8, 0)

View File

@ -1,6 +1,6 @@
.\" Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.TH MDBX_CHK 1 "2023-02-14" "MDBX 0.11.14"
.TH MDBX_CHK 1 "2023-03-03" "MDBX 0.12.4"
.SH NAME
mdbx_chk \- MDBX checking tool
.SH SYNOPSIS
@ -81,6 +81,13 @@ Turn to a specified meta-page on successful check.
.BR \-T
Turn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!
.TP
.BR \-u
Warms up the DB before checking via notifying OS kernel of subsequent access to the database pages.
.TP
.BR \-U
Warms up the DB before checking, notifying the OS kernel of subsequent access to the database pages,
then forcibly loads ones by sequential access and tries to lock database pages in memory.
.TP
.BR \-n
Open MDBX environment(s) which do not use subdirectories.
This is legacy option. For now MDBX handles this automatically.

View File

@ -1,8 +1,8 @@
.\" Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.TH MDBX_COPY 1 "2023-02-14" "MDBX 0.11.14"
.TH MDBX_COPY 1 "2023-03-03" "MDBX 0.12.4"
.SH NAME
mdbx_copy \- MDBX environment copy tool
.SH SYNOPSIS
@ -45,6 +45,13 @@ or unused pages will be omitted from the copy. This option will
slow down the backup process as it is more CPU-intensive.
Currently it fails if the environment has suffered a page leak.
.TP
.BR \-u
Warms up the DB before copying via notifying OS kernel of subsequent access to the database pages.
.TP
.BR \-U
Warms up the DB before copying, notifying the OS kernel of subsequent access to the database pages,
then forcibly loads ones by sequential access and tries to lock database pages in memory.
.TP
.BR \-n
Open MDBX environment(s) which do not use subdirectories.
This is legacy option. For now MDBX handles this automatically.

View File

@ -1,7 +1,7 @@
.\" Copyright 2021-2022 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2021-2023 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.TH MDBX_DROP 1 "2023-02-14" "MDBX 0.11.14"
.TH MDBX_DROP 1 "2023-03-03" "MDBX 0.12.4"
.SH NAME
mdbx_drop \- MDBX database delete tool
.SH SYNOPSIS

View File

@ -1,8 +1,8 @@
.\" Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.TH MDBX_DUMP 1 "2023-02-14" "MDBX 0.11.14"
.TH MDBX_DUMP 1 "2023-03-03" "MDBX 0.12.4"
.SH NAME
mdbx_dump \- MDBX environment export tool
.SH SYNOPSIS
@ -66,6 +66,13 @@ Dump a specific subdatabase. If no database is specified, only the main database
.BR \-r
Rescure mode. Ignore some errors to dump corrupted DB.
.TP
.BR \-u
Warms up the DB before dumping via notifying OS kernel of subsequent access to the database pages.
.TP
.BR \-U
Warms up the DB before dumping, notifying the OS kernel of subsequent access to the database pages,
then forcibly loads ones by sequential access and tries to lock database pages in memory.
.TP
.BR \-n
Dump an MDBX database which does not use subdirectories.
This is legacy option. For now MDBX handles this automatically.

View File

@ -1,8 +1,8 @@
.\" Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.TH MDBX_LOAD 1 "2023-02-14" "MDBX 0.11.14"
.TH MDBX_LOAD 1 "2023-03-03" "MDBX 0.12.4"
.SH NAME
mdbx_load \- MDBX environment import tool
.SH SYNOPSIS

View File

@ -1,8 +1,8 @@
.\" Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>.
.\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.TH MDBX_STAT 1 "2023-02-14" "MDBX 0.11.14"
.TH MDBX_STAT 1 "2023-03-03" "MDBX 0.12.4"
.SH NAME
mdbx_stat \- MDBX environment status tool
.SH SYNOPSIS

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2020-2022, Leonid Yuriev <leo@yuriev.ru>.
// Copyright (c) 2020-2023, Leonid Yuriev <leo@yuriev.ru>.
// SPDX-License-Identifier: Apache-2.0
//
// Non-inline part of the libmdbx C++ API
@ -14,6 +14,12 @@
#define __USE_MINGW_ANSI_STDIO 1
#endif /* MinGW */
/* Workaround for MSVC' header `extern "C"` vs `std::` redefinition bug */
#if defined(_MSC_VER) && defined(__SANITIZE_ADDRESS__) && \
!defined(_DISABLE_VECTOR_ANNOTATION)
#define _DISABLE_VECTOR_ANNOTATION
#endif /* _DISABLE_VECTOR_ANNOTATION */
#include "../mdbx.h++"
#include "internals.h"
@ -201,41 +207,6 @@ __cold bug::~bug() noexcept {}
#endif /* Unused*/
//------------------------------------------------------------------------------
#if defined(_WIN32) || defined(_WIN64)
std::string w2mb(const std::wstring &in) {
std::string out;
if (!in.empty()) {
const auto out_len = mdbx_w2mb(nullptr, 0, in.data(), in.size());
if (out_len < 1)
mdbx::error::throw_exception(GetLastError());
out.append(out_len, '\0');
if (out_len != mdbx_w2mb(const_cast<char *>(out.data()), out_len, in.data(),
in.size()))
mdbx::error::throw_exception(GetLastError());
}
return out;
}
std::wstring mb2w(const char *in) {
std::wstring out;
if (in && *in) {
const auto in_len = strlen(in);
const auto out_len = mdbx_mb2w(nullptr, 0, in, in_len);
if (out_len < 1)
mdbx::error::throw_exception(GetLastError());
out.append(out_len, '\0');
if (out_len !=
mdbx_mb2w(const_cast<wchar_t *>(out.data()), out_len, in, in_len))
mdbx::error::throw_exception(GetLastError());
}
return out;
}
#endif /* Windows */
} // namespace
//------------------------------------------------------------------------------
@ -1240,6 +1211,23 @@ env &env::copy(const ::std::string &destination, bool compactify,
return copy(destination.c_str(), compactify, force_dynamic_size);
}
#if defined(_WIN32) || defined(_WIN64)
env &env::copy(const wchar_t *destination, bool compactify,
bool force_dynamic_size) {
error::success_or_throw(
::mdbx_env_copyW(handle_, destination,
(compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) |
(force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE
: MDBX_CP_DEFAULTS)));
return *this;
}
env &env::copy(const ::std::wstring &destination, bool compactify,
bool force_dynamic_size) {
return copy(destination.c_str(), compactify, force_dynamic_size);
}
#endif /* Windows */
#ifdef MDBX_STD_FILESYSTEM_PATH
env &env::copy(const MDBX_STD_FILESYSTEM_PATH &destination, bool compactify,
bool force_dynamic_size) {
@ -1247,20 +1235,15 @@ env &env::copy(const MDBX_STD_FILESYSTEM_PATH &destination, bool compactify,
}
#endif /* MDBX_STD_FILESYSTEM_PATH */
#if defined(_WIN32) || defined(_WIN64)
env &env::copy(const ::std::wstring &destination, bool compactify,
bool force_dynamic_size) {
return copy(w2mb(destination), compactify, force_dynamic_size);
}
#endif /* Windows */
path env::get_path() const {
#if defined(_WIN32) || defined(_WIN64)
const wchar_t *c_wstr;
error::success_or_throw(::mdbx_env_get_pathW(handle_, &c_wstr));
static_assert(sizeof(path::value_type) == sizeof(wchar_t), "Oops");
return path(c_wstr);
#else
const char *c_str;
error::success_or_throw(::mdbx_env_get_path(handle_, &c_str));
#if defined(_WIN32) || defined(_WIN64)
static_assert(sizeof(path::value_type) == sizeof(wchar_t), "Oops");
return path(mb2w(c_str));
#else
static_assert(sizeof(path::value_type) == sizeof(char), "Oops");
return path(c_str);
#endif
@ -1275,6 +1258,17 @@ bool env::remove(const ::std::string &pathname, const remove_mode mode) {
return remove(pathname.c_str(), mode);
}
#if defined(_WIN32) || defined(_WIN64)
bool env::remove(const wchar_t *pathname, const remove_mode mode) {
return error::boolean_or_throw(
::mdbx_env_deleteW(pathname, MDBX_env_delete_mode_t(mode)));
}
bool env::remove(const ::std::wstring &pathname, const remove_mode mode) {
return remove(pathname.c_str(), mode);
}
#endif /* Windows */
#ifdef MDBX_STD_FILESYSTEM_PATH
bool env::remove(const MDBX_STD_FILESYSTEM_PATH &pathname,
const remove_mode mode) {
@ -1282,12 +1276,6 @@ bool env::remove(const MDBX_STD_FILESYSTEM_PATH &pathname,
}
#endif /* MDBX_STD_FILESYSTEM_PATH */
#if defined(_WIN32) || defined(_WIN64)
bool env::remove(const ::std::wstring &pathname, const remove_mode mode) {
return remove(w2mb(pathname), mode);
}
#endif /* Windows */
//------------------------------------------------------------------------------
static inline MDBX_env *create_env() {
@ -1360,6 +1348,44 @@ __cold env_managed::env_managed(const ::std::string &pathname,
const env::operate_parameters &op, bool accede)
: env_managed(pathname.c_str(), cp, op, accede) {}
#if defined(_WIN32) || defined(_WIN64)
__cold env_managed::env_managed(const wchar_t *pathname,
const operate_parameters &op, bool accede)
: env_managed(create_env()) {
setup(op.max_maps, op.max_readers);
error::success_or_throw(
::mdbx_env_openW(handle_, pathname, op.make_flags(accede), 0));
if (op.options.nested_write_transactions &&
!get_options().nested_write_transactions)
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_INCOMPATIBLE);
}
__cold env_managed::env_managed(const wchar_t *pathname,
const env_managed::create_parameters &cp,
const env::operate_parameters &op, bool accede)
: env_managed(create_env()) {
setup(op.max_maps, op.max_readers);
set_geometry(cp.geometry);
error::success_or_throw(::mdbx_env_openW(
handle_, pathname, op.make_flags(accede, cp.use_subdirectory),
cp.file_mode_bits));
if (op.options.nested_write_transactions &&
!get_options().nested_write_transactions)
MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_INCOMPATIBLE);
}
__cold env_managed::env_managed(const ::std::wstring &pathname,
const operate_parameters &op, bool accede)
: env_managed(pathname.c_str(), op, accede) {}
__cold env_managed::env_managed(const ::std::wstring &pathname,
const env_managed::create_parameters &cp,
const env::operate_parameters &op, bool accede)
: env_managed(pathname.c_str(), cp, op, accede) {}
#endif /* Windows */
#ifdef MDBX_STD_FILESYSTEM_PATH
__cold env_managed::env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname,
const operate_parameters &op, bool accede)
@ -1371,17 +1397,6 @@ __cold env_managed::env_managed(const MDBX_STD_FILESYSTEM_PATH &pathname,
: env_managed(pathname.native(), cp, op, accede) {}
#endif /* MDBX_STD_FILESYSTEM_PATH */
#if defined(_WIN32) || defined(_WIN64)
__cold env_managed::env_managed(const ::std::wstring &pathname,
const operate_parameters &op, bool accede)
: env_managed(w2mb(pathname), op, accede) {}
__cold env_managed::env_managed(const ::std::wstring &pathname,
const env_managed::create_parameters &cp,
const env::operate_parameters &op, bool accede)
: env_managed(w2mb(pathname), cp, op, accede) {}
#endif /* Windows */
//------------------------------------------------------------------------------
txn_managed txn::start_nested() {
@ -1415,6 +1430,15 @@ void txn_managed::commit() {
MDBX_CXX20_UNLIKELY err.throw_exception();
}
void txn_managed::commit(commit_latency *latency) {
const error err =
static_cast<MDBX_error_t>(::mdbx_txn_commit_ex(handle_, latency));
if (MDBX_LIKELY(err.code() != MDBX_THREAD_MISMATCH))
MDBX_CXX20_LIKELY handle_ = nullptr;
if (MDBX_UNLIKELY(err.code() != MDBX_SUCCESS))
MDBX_CXX20_UNLIKELY err.throw_exception();
}
//------------------------------------------------------------------------------
bool txn::drop_map(const char *name, bool throw_if_absent) {

View File

@ -1,7 +1,7 @@
/* mdbx_chk.c - memory-mapped database check tool */
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -20,9 +20,11 @@
#pragma warning(disable : 4996) /* The POSIX name is deprecated... */
#endif /* _MSC_VER (warnings) */
#define xMDBX_TOOLS /* Avoid using internal mdbx_assert() */
#define xMDBX_TOOLS /* Avoid using internal eASSERT() */
#include "internals.h"
#include <ctype.h>
typedef struct flagbit {
int bit;
const char *name;
@ -71,7 +73,7 @@ static void signal_handler(int sig) {
#define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE
typedef struct {
const char *name;
MDBX_val name;
struct {
uint64_t branch, large_count, large_volume, leaf;
uint64_t subleaf_dupsort, leaf_dupfixed, subleaf_dupfixed;
@ -93,7 +95,7 @@ struct {
#define dbi_main walk.dbi[MAIN_DBI]
#define dbi_meta walk.dbi[CORE_DBS]
int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE;
int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION;
MDBX_env *env;
MDBX_txn *txn;
MDBX_envinfo envinfo;
@ -102,7 +104,7 @@ uint64_t total_unused_bytes, reclaimable_pages, gc_pages, alloc_pages,
unused_pages, backed_pages;
unsigned verbose;
bool ignore_wrong_order, quiet, dont_traversal;
const char *only_subdb;
MDBX_val only_subdb;
int stuck_meta = -1;
struct problem {
@ -125,7 +127,99 @@ static void MDBX_PRINTF_ARGS(1, 2) print(const char *msg, ...) {
}
}
static void va_log(MDBX_log_level_t level, const char *msg, va_list args) {
static MDBX_val printable_buf;
static void free_printable_buf(void) { osal_free(printable_buf.iov_base); }
static const char *sdb_name(const MDBX_val *val) {
if (val == MDBX_PGWALK_MAIN)
return "@MAIN";
if (val == MDBX_PGWALK_GC)
return "@GC";
if (val == MDBX_PGWALK_META)
return "@META";
const unsigned char *const data = val->iov_base;
const size_t len = val->iov_len;
if (data == MDBX_PGWALK_MAIN)
return "@MAIN";
if (data == MDBX_PGWALK_GC)
return "@GC";
if (data == MDBX_PGWALK_META)
return "@META";
if (!len)
return "<zero-length>";
if (!data)
return "<nullptr>";
if (len > 65536) {
static char buf[64];
/* NOTE: There is MSYS2 MinGW bug if you here got
* the "unknown conversion type character z in format [-Werror=format=]"
* https://stackoverflow.com/questions/74504432/whats-the-proper-way-to-tell-mingw-based-gcc-to-use-ansi-stdio-output-on-windo
*/
snprintf(buf, sizeof(buf), "<too-long-%zu>", len);
return buf;
}
bool printable = true;
bool quoting = false;
size_t xchars = 0;
for (size_t i = 0; i < val->iov_len && printable; ++i) {
quoting |= data[i] != '_' && isalnum(data[i]) == 0;
printable = isprint(data[i]) != 0 ||
(data[i] < ' ' && ++xchars < 4 && len > xchars * 4);
}
size_t need = len + 1;
if (quoting || !printable)
need += len + /* quotes */ 2 + 2 * /* max xchars */ 4;
if (need > printable_buf.iov_len) {
void *ptr = osal_realloc(printable_buf.iov_base, need);
if (!ptr)
return "<out-of-memory>";
if (!printable_buf.iov_base)
atexit(free_printable_buf);
printable_buf.iov_base = ptr;
printable_buf.iov_len = need;
}
char *out = printable_buf.iov_base;
if (!quoting) {
memcpy(out, data, len);
out += len;
} else if (printable) {
*out++ = '\'';
for (size_t i = 0; i < len; ++i) {
if (data[i] < ' ') {
assert((char *)printable_buf.iov_base + printable_buf.iov_len >
out + 4);
static const char hex[] = "0123456789abcdef";
out[0] = '\\';
out[1] = 'x';
out[2] = hex[data[i] >> 4];
out[3] = hex[data[i] & 15];
out += 4;
} else if (strchr("\"'`\\", data[i])) {
assert((char *)printable_buf.iov_base + printable_buf.iov_len >
out + 2);
out[0] = '\\';
out[1] = data[i];
out += 2;
} else {
assert((char *)printable_buf.iov_base + printable_buf.iov_len >
out + 1);
*out++ = data[i];
}
}
*out++ = '\'';
}
assert((char *)printable_buf.iov_base + printable_buf.iov_len > out);
*out = 0;
return printable_buf.iov_base;
}
static void va_log(MDBX_log_level_t level, const char *function, int line,
const char *msg, va_list args) {
static const char *const prefixes[] = {
"!!!fatal: ", " ! " /* error */, " ~ " /* warning */,
" " /* notice */, " // " /* verbose */, " //// " /* debug */,
@ -143,13 +237,20 @@ static void va_log(MDBX_log_level_t level, const char *msg, va_list args) {
fflush(nullptr);
fputs(prefixes[level], out);
vfprintf(out, msg, args);
if (msg[strlen(msg) - 1] != '\n')
const bool have_lf = msg[strlen(msg) - 1] == '\n';
if (level == MDBX_LOG_FATAL && function && line)
fprintf(out, have_lf ? " %s(), %u\n" : " (%s:%u)\n",
function + (strncmp(function, "mdbx_", 5) ? 5 : 0), line);
else if (!have_lf)
fputc('\n', out);
fflush(nullptr);
}
if (level == MDBX_LOG_FATAL) {
#if !MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS
exit(EXIT_FAILURE_MDBX);
#endif
abort();
}
}
@ -157,7 +258,7 @@ static void va_log(MDBX_log_level_t level, const char *msg, va_list args) {
static void MDBX_PRINTF_ARGS(1, 2) error(const char *msg, ...) {
va_list args;
va_start(args, msg);
va_log(MDBX_LOG_ERROR, msg, args);
va_log(MDBX_LOG_ERROR, nullptr, 0, msg, args);
va_end(args);
}
@ -166,7 +267,7 @@ static void logger(MDBX_log_level_t level, const char *function, int line,
(void)line;
(void)function;
if (level < MDBX_LOG_EXTRA)
va_log(level, msg, args);
va_log(level, function, line, msg, args);
}
static int check_user_break(void) {
@ -182,19 +283,17 @@ static int check_user_break(void) {
}
static void pagemap_cleanup(void) {
for (size_t i = CORE_DBS + /* account pseudo-entry for meta */ 1;
i < ARRAY_LENGTH(walk.dbi); ++i) {
if (walk.dbi[i].name) {
mdbx_free((void *)walk.dbi[i].name);
walk.dbi[i].name = nullptr;
}
}
mdbx_free(walk.pagemap);
osal_free(walk.pagemap);
walk.pagemap = nullptr;
}
static walk_dbi_t *pagemap_lookup_dbi(const char *dbi_name, bool silent) {
static bool eq(const MDBX_val a, const MDBX_val b) {
return a.iov_len == b.iov_len &&
(a.iov_base == b.iov_base || a.iov_len == 0 ||
!memcmp(a.iov_base, b.iov_base, a.iov_len));
}
static walk_dbi_t *pagemap_lookup_dbi(const MDBX_val *dbi_name, bool silent) {
static walk_dbi_t *last;
if (dbi_name == MDBX_PGWALK_MAIN)
@ -204,24 +303,24 @@ static walk_dbi_t *pagemap_lookup_dbi(const char *dbi_name, bool silent) {
if (dbi_name == MDBX_PGWALK_META)
return &dbi_meta;
if (last && strcmp(last->name, dbi_name) == 0)
if (last && eq(last->name, *dbi_name))
return last;
walk_dbi_t *dbi = walk.dbi + CORE_DBS + /* account pseudo-entry for meta */ 1;
for (; dbi < ARRAY_END(walk.dbi) && dbi->name; ++dbi) {
if (strcmp(dbi->name, dbi_name) == 0)
for (; dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) {
if (eq(dbi->name, *dbi_name))
return last = dbi;
}
if (verbose > 0 && !silent) {
print(" - found '%s' area\n", dbi_name);
print(" - found %s area\n", sdb_name(dbi_name));
fflush(nullptr);
}
if (dbi == ARRAY_END(walk.dbi))
return nullptr;
dbi->name = mdbx_strdup(dbi_name);
dbi->name = *dbi_name;
return last = dbi;
}
@ -239,7 +338,7 @@ static void MDBX_PRINTF_ARGS(4, 5)
break;
if (!p) {
p = mdbx_calloc(1, sizeof(*p));
p = osal_calloc(1, sizeof(*p));
if (unlikely(!p))
return;
p->caption = msg;
@ -284,7 +383,7 @@ static size_t problems_pop(struct problem *list) {
count += problems_list->count;
print("%s%s (%" PRIuPTR ")", i ? ", " : "", problems_list->caption,
problems_list->count);
mdbx_free(problems_list);
osal_free(problems_list);
problems_list = p;
}
print("\n");
@ -296,13 +395,13 @@ static size_t problems_pop(struct problem *list) {
}
static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
void *const ctx, const int deep,
const char *const dbi_name_or_tag, const size_t page_size,
const MDBX_page_type_t pagetype, const MDBX_error_t err,
const size_t nentries, const size_t payload_bytes,
const size_t header_bytes, const size_t unused_bytes) {
void *const ctx, const int deep, const MDBX_val *dbi_name,
const size_t page_size, const MDBX_page_type_t pagetype,
const MDBX_error_t err, const size_t nentries,
const size_t payload_bytes, const size_t header_bytes,
const size_t unused_bytes) {
(void)ctx;
const bool is_gc_tree = dbi_name_or_tag == MDBX_PGWALK_GC;
const bool is_gc_tree = dbi_name == MDBX_PGWALK_GC;
if (deep > 42) {
problem_add("deep", deep, "too large", nullptr);
data_tree_problems += !is_gc_tree;
@ -310,7 +409,7 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
return MDBX_CORRUPTED /* avoid infinite loop/recursion */;
}
walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name_or_tag, false);
walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name, false);
if (!dbi) {
data_tree_problems += !is_gc_tree;
gc_tree_problems += is_gc_tree;
@ -375,14 +474,14 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
}
if (pgnumber) {
if (verbose > 3 && (!only_subdb || strcmp(only_subdb, dbi->name) == 0)) {
if (verbose > 3 && (!only_subdb.iov_base || eq(only_subdb, dbi->name))) {
if (pgnumber == 1)
print(" %s-page %" PRIu64, pagetype_caption, pgno);
else
print(" %s-span %" PRIu64 "[%u]", pagetype_caption, pgno, pgnumber);
print(" of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR
", unused %" PRIiPTR ", deep %i\n",
dbi->name, header_bytes,
sdb_name(&dbi->name), header_bytes,
(pagetype == MDBX_page_branch) ? "keys" : "entries", nentries,
payload_bytes, unused_bytes, deep);
}
@ -400,8 +499,8 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
walk_dbi_t *coll_dbi = &walk.dbi[walk.pagemap[spanpgno] - 1];
problem_add("page", spanpgno,
(branch && coll_dbi == dbi) ? "loop" : "already used",
"%s-page: by %s, deep %i", pagetype_caption, coll_dbi->name,
deep);
"%s-page: by %s, deep %i", pagetype_caption,
sdb_name(&coll_dbi->name), deep);
already_used = true;
data_tree_problems += !is_gc_tree;
gc_tree_problems += is_gc_tree;
@ -472,8 +571,8 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
data_tree_problems += !is_gc_tree;
gc_tree_problems += is_gc_tree;
} else {
dbi->payload_bytes += payload_bytes + header_bytes;
walk.total_payload_bytes += payload_bytes + header_bytes;
dbi->payload_bytes += (uint64_t)payload_bytes + header_bytes;
walk.total_payload_bytes += (uint64_t)payload_bytes + header_bytes;
}
}
}
@ -483,8 +582,8 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
typedef int(visitor)(const uint64_t record_number, const MDBX_val *key,
const MDBX_val *data);
static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
bool silent);
static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name,
visitor *handler);
static int handle_userdb(const uint64_t record_number, const MDBX_val *key,
const MDBX_val *data) {
@ -521,7 +620,7 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
number = data->iov_len / sizeof(pgno_t) - 1;
} else if (data->iov_len - (number + 1) * sizeof(pgno_t) >=
/* LY: allow gap up to one page. it is ok
* and better than shink-and-retry inside mdbx_update_gc() */
* and better than shink-and-retry inside update_gc() */
envinfo.mi_dxb_pagesize)
problem_add("entry", txnid, "extra idl space",
"%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)",
@ -533,7 +632,7 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : txn->mt_next_pgno;
pgno_t span = 1;
for (unsigned i = 0; i < number; ++i) {
for (size_t i = 0; i < number; ++i) {
if (check_user_break())
return MDBX_EINTR;
const pgno_t pgno = iptr[i];
@ -552,7 +651,7 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
if (MDBX_PNL_DISORDERED(prev, pgno)) {
bad = " [bad sequence]";
problem_add("entry", txnid, "bad sequence",
"%" PRIaPGNO " %c [%u].%" PRIaPGNO, prev,
"%" PRIaPGNO " %c [%zu].%" PRIaPGNO, prev,
(prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'),
i, pgno);
}
@ -562,7 +661,7 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
walk.pagemap[pgno] = -1;
else if (idx > 0)
problem_add("page", pgno, "already used", "by %s",
walk.dbi[idx - 1].name);
sdb_name(&walk.dbi[idx - 1].name));
else
problem_add("page", pgno, "already listed in GC", nullptr);
}
@ -573,12 +672,12 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
: pgno_sub(pgno, span)))
++span;
}
if (verbose > 3 && !only_subdb) {
if (verbose > 3 && !only_subdb.iov_base) {
print(" transaction %" PRIaTXN ", %" PRIuPTR
" pages, maxspan %" PRIaPGNO "%s\n",
txnid, number, span, bad);
if (verbose > 4) {
for (unsigned i = 0; i < number; i += span) {
for (size_t i = 0; i < number; i += span) {
const pgno_t pgno = iptr[i];
for (span = 1;
i + span < number &&
@ -600,36 +699,18 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
}
static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) {
return (a->iov_len == b->iov_len &&
memcmp(a->iov_base, b->iov_base, a->iov_len) == 0)
? 0
: 1;
return eq(*a, *b) ? 0 : 1;
}
static int handle_maindb(const uint64_t record_number, const MDBX_val *key,
const MDBX_val *data) {
char *name;
int rc;
size_t i;
name = key->iov_base;
for (i = 0; i < key->iov_len; ++i) {
if (name[i] < ' ')
return handle_userdb(record_number, key, data);
}
name = mdbx_malloc(key->iov_len + 1);
if (unlikely(!name))
return MDBX_ENOMEM;
memcpy(name, key->iov_base, key->iov_len);
name[key->iov_len] = '\0';
if (data->iov_len == sizeof(MDBX_db)) {
int rc = process_db(~0u, key, handle_userdb);
if (rc != MDBX_INCOMPATIBLE) {
userdb_count++;
rc = process_db(~0u, name, handle_userdb, false);
mdbx_free(name);
if (rc != MDBX_INCOMPATIBLE)
return rc;
}
}
return handle_userdb(record_number, key, data);
}
@ -683,8 +764,8 @@ static const char *db_flags2valuemode(unsigned flags) {
}
}
static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
bool silent) {
static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name,
visitor *handler) {
MDBX_cursor *mc;
MDBX_stat ms;
MDBX_val key, data;
@ -693,18 +774,19 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
int rc, i;
struct problem *saved_list;
uint64_t problems_count;
const bool second_pass = dbi_handle == MAIN_DBI;
uint64_t record_count = 0, dups = 0;
uint64_t key_bytes = 0, data_bytes = 0;
if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & mdbx_txn_flags(txn)) {
print(" ! abort processing '%s' due to a previous error\n",
dbi_name ? dbi_name : "@MAIN");
print(" ! abort processing %s due to a previous error\n",
sdb_name(dbi_name));
return MDBX_BAD_TXN;
}
if (dbi_handle == ~0u) {
rc = mdbx_dbi_open_ex(
rc = mdbx_dbi_open_ex2(
txn, dbi_name, MDBX_DB_ACCEDE, &dbi_handle,
(dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr,
(dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr);
@ -712,27 +794,26 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
if (!dbi_name ||
rc !=
MDBX_INCOMPATIBLE) /* LY: mainDB's record is not a user's DB. */ {
error("mdbx_dbi_open('%s') failed, error %d %s\n",
dbi_name ? dbi_name : "main", rc, mdbx_strerror(rc));
error("mdbx_dbi_open(%s) failed, error %d %s\n", sdb_name(dbi_name), rc,
mdbx_strerror(rc));
}
return rc;
}
}
if (dbi_handle >= CORE_DBS && dbi_name && only_subdb &&
strcmp(only_subdb, dbi_name) != 0) {
if (dbi_handle >= CORE_DBS && dbi_name && only_subdb.iov_base &&
!eq(only_subdb, *dbi_name)) {
if (verbose) {
print("Skip processing '%s'...\n", dbi_name);
print("Skip processing %s...\n", sdb_name(dbi_name));
fflush(nullptr);
}
skipped_subdb++;
return MDBX_SUCCESS;
}
if (!silent && verbose) {
print("Processing '%s'...\n", dbi_name ? dbi_name : "@MAIN");
if (!second_pass && verbose)
print("Processing %s...\n", sdb_name(dbi_name));
fflush(nullptr);
}
rc = mdbx_dbi_flags(txn, dbi_handle, &flags);
if (rc) {
@ -746,7 +827,7 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
return rc;
}
if (!silent && verbose) {
if (!second_pass && verbose) {
print(" - key-value kind: %s-key => %s-value", db_flags2keymode(flags),
db_flags2valuemode(flags));
if (verbose > 1) {
@ -805,9 +886,9 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
}
if (ignore_wrong_order) { /* for debugging with enabled assertions */
mc->mc_flags |= C_SKIPORD;
mc->mc_checking |= CC_SKIPORD;
if (mc->mc_xcursor)
mc->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD;
mc->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD;
}
const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, flags);
@ -822,6 +903,7 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
if (rc)
goto bailout;
if (!second_pass) {
bool bad_key = false;
if (key.iov_len > maxkeysize) {
problem_add("entry", record_count, "key length exceeds max-key-size",
@ -858,21 +940,38 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
if ((flags & MDBX_DUPSORT) == 0) {
problem_add("entry", record_count, "duplicated entries", nullptr);
if (prev_data.iov_base && data.iov_len == prev_data.iov_len &&
memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) {
problem_add("entry", record_count, "complete duplicate", nullptr);
memcmp(data.iov_base, prev_data.iov_base, data.iov_len) ==
0) {
problem_add("entry", record_count, "complete duplicate",
nullptr);
}
} else if (!bad_data && prev_data.iov_base) {
cmp = mdbx_dcmp(txn, dbi_handle, &data, &prev_data);
if (cmp == 0) {
problem_add("entry", record_count, "complete duplicate", nullptr);
problem_add("entry", record_count, "complete duplicate",
nullptr);
} else if (cmp < 0 && !ignore_wrong_order) {
problem_add("entry", record_count, "wrong order of multi-values",
problem_add("entry", record_count,
"wrong order of multi-values", nullptr);
}
}
} else if (cmp < 0 && !ignore_wrong_order) {
problem_add("entry", record_count, "wrong order of entries",
nullptr);
}
}
} else if (cmp < 0 && !ignore_wrong_order) {
problem_add("entry", record_count, "wrong order of entries", nullptr);
}
if (!bad_key) {
if (verbose && (flags & MDBX_INTEGERKEY) && !prev_key.iov_base)
print(" - fixed key-size %" PRIuPTR "\n", key.iov_len);
prev_key = key;
}
if (!bad_data) {
if (verbose && (flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) &&
!prev_data.iov_base)
print(" - fixed data-size %" PRIuPTR "\n", data.iov_len);
prev_data = data;
}
}
@ -886,17 +985,6 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
key_bytes += key.iov_len;
data_bytes += data.iov_len;
if (!bad_key) {
if (verbose && (flags & MDBX_INTEGERKEY) && !prev_key.iov_base)
print(" - fixed key-size %" PRIuPTR "\n", key.iov_len);
prev_key = key;
}
if (!bad_data) {
if (verbose && (flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) &&
!prev_data.iov_base)
print(" - fixed data-size %" PRIuPTR "\n", data.iov_len);
prev_data = data;
}
rc = mdbx_cursor_get(mc, &key, &data, MDBX_NEXT);
}
if (rc != MDBX_NOTFOUND)
@ -909,7 +997,7 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
"%" PRIu64 " != %" PRIu64, record_count, ms.ms_entries);
bailout:
problems_count = problems_pop(saved_list);
if (!silent && verbose) {
if (!second_pass && verbose) {
print(" - summary: %" PRIu64 " records, %" PRIu64 " dups, %" PRIu64
" key's bytes, %" PRIu64 " data's "
"bytes, %" PRIu64 " problems\n",
@ -922,9 +1010,10 @@ bailout:
}
static void usage(char *prog) {
fprintf(stderr,
"usage: %s [-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] "
"dbpath\n"
fprintf(
stderr,
"usage: %s "
"[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n"
" -V\t\tprint version and exit\n"
" -v\t\tmore verbose, could be used multiple times\n"
" -q\t\tbe quiet\n"
@ -933,6 +1022,8 @@ static void usage(char *prog) {
" -d\t\tdisable page-by-page traversal of B-tree\n"
" -i\t\tignore wrong order errors (for custom comparators case)\n"
" -s subdb\tprocess a specific subdatabase only\n"
" -u\t\twarmup database before checking\n"
" -U\t\twarmup and try lock database pages in memory before checking\n"
" -0|1|2\tforce using specific meta-page 0, or 2 for checking\n"
" -t\t\tturn to a specified meta-page on successful check\n"
" -T\t\tturn to a specified meta-page EVEN ON UNSUCCESSFUL CHECK!\n",
@ -1075,6 +1166,8 @@ int main(int argc, char *argv[]) {
bool write_locked = false;
bool turn_meta = false;
bool force_turn_meta = false;
bool warmup = false;
MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default;
double elapsed;
#if defined(_WIN32) || defined(_WIN64)
@ -1089,15 +1182,16 @@ int main(int argc, char *argv[]) {
}
#endif
dbi_meta.name = "@META";
dbi_free.name = "@GC";
dbi_main.name = "@MAIN";
dbi_meta.name.iov_base = MDBX_PGWALK_META;
dbi_free.name.iov_base = MDBX_PGWALK_GC;
dbi_main.name.iov_base = MDBX_PGWALK_MAIN;
atexit(pagemap_cleanup);
if (argc < 2)
usage(prog);
for (int i; (i = getopt(argc, argv,
"uU"
"0"
"1"
"2"
@ -1168,13 +1262,22 @@ int main(int argc, char *argv[]) {
dont_traversal = true;
break;
case 's':
if (only_subdb && strcmp(only_subdb, optarg))
if (only_subdb.iov_base && strcmp(only_subdb.iov_base, optarg))
usage(prog);
only_subdb = optarg;
only_subdb.iov_base = optarg;
only_subdb.iov_len = strlen(optarg);
break;
case 'i':
ignore_wrong_order = true;
break;
case 'u':
warmup = true;
break;
case 'U':
warmup = true;
warmup_flags =
MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock;
break;
default:
usage(prog);
}
@ -1200,8 +1303,9 @@ int main(int argc, char *argv[]) {
error("write-mode must be enabled to turn to the specified meta-page.\n");
rc = EXIT_INTERRUPTED;
}
if (only_subdb || dont_traversal) {
error("whole database checking with tree-traversal are required to turn "
if (only_subdb.iov_base || dont_traversal) {
error(
"whole database checking with b-tree traversal are required to turn "
"to the specified meta-page.\n");
rc = EXIT_INTERRUPTED;
}
@ -1231,7 +1335,9 @@ int main(int argc, char *argv[]) {
mdbx_setup_debug((verbose < MDBX_LOG_TRACE - 1)
? (MDBX_log_level_t)(verbose + 1)
: MDBX_LOG_TRACE,
MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE, logger);
MDBX_DBG_DUMP | MDBX_DBG_ASSERT | MDBX_DBG_AUDIT |
MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE,
logger);
rc = mdbx_env_create(&env);
if (rc) {
@ -1274,14 +1380,35 @@ int main(int argc, char *argv[]) {
(envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative");
if ((envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) {
if (verbose) {
print(" - taking write lock...");
fflush(nullptr);
}
rc = mdbx_txn_lock(env, false);
if (rc != MDBX_SUCCESS) {
error("mdbx_txn_lock() failed, error %d %s\n", rc, mdbx_strerror(rc));
goto bailout;
}
if (verbose)
print(" done\n");
write_locked = true;
}
if (warmup) {
if (verbose) {
print(" - warming up...");
fflush(nullptr);
}
rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536);
if (MDBX_IS_ERROR(rc)) {
error("mdbx_env_warmup(flags %u) failed, error %d %s\n", warmup_flags, rc,
mdbx_strerror(rc));
goto bailout;
}
if (verbose)
print(" %s\n", rc ? "timeout" : "done");
}
rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
if (rc) {
error("mdbx_txn_begin() failed, error %d %s\n", rc, mdbx_strerror(rc));
@ -1330,7 +1457,7 @@ int main(int argc, char *argv[]) {
}
#endif
if (rc) {
error("mdbx_filesize() failed, error %d %s\n", rc, mdbx_strerror(rc));
error("osal_filesize() failed, error %d %s\n", rc, mdbx_strerror(rc));
goto bailout;
}
@ -1494,7 +1621,7 @@ int main(int argc, char *argv[]) {
print("Traversal b-tree by txn#%" PRIaTXN "...\n", txn->mt_txnid);
fflush(nullptr);
walk.pagemap = mdbx_calloc((size_t)backed_pages, sizeof(*walk.pagemap));
walk.pagemap = osal_calloc((size_t)backed_pages, sizeof(*walk.pagemap));
if (!walk.pagemap) {
rc = errno ? errno : MDBX_ENOMEM;
error("calloc() failed, error %d %s\n", rc, mdbx_strerror(rc));
@ -1518,8 +1645,8 @@ int main(int argc, char *argv[]) {
unused_pages += 1;
empty_pages = lost_bytes = 0;
for (walk_dbi_t *dbi = &dbi_main; dbi < ARRAY_END(walk.dbi) && dbi->name;
++dbi) {
for (walk_dbi_t *dbi = &dbi_main;
dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) {
empty_pages += dbi->pages.empty;
lost_bytes += dbi->lost_bytes;
}
@ -1529,9 +1656,10 @@ int main(int argc, char *argv[]) {
print(" - pages: walked %" PRIu64 ", left/unused %" PRIu64 "\n",
walk.pgcount, unused_pages);
if (verbose > 1) {
for (walk_dbi_t *dbi = walk.dbi; dbi < ARRAY_END(walk.dbi) && dbi->name;
++dbi) {
print(" %s: subtotal %" PRIu64, dbi->name, dbi->pages.total);
for (walk_dbi_t *dbi = walk.dbi;
dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) {
print(" %s: subtotal %" PRIu64, sdb_name(&dbi->name),
dbi->pages.total);
if (dbi->pages.other && dbi->pages.other != dbi->pages.total)
print(", other %" PRIu64, dbi->pages.other);
if (dbi->pages.branch)
@ -1563,14 +1691,15 @@ int main(int argc, char *argv[]) {
(total_page_bytes - walk.total_payload_bytes) * 100.0 /
total_page_bytes);
if (verbose > 2) {
for (walk_dbi_t *dbi = walk.dbi; dbi < ARRAY_END(walk.dbi) && dbi->name;
++dbi)
for (walk_dbi_t *dbi = walk.dbi;
dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi)
if (dbi->pages.total) {
uint64_t dbi_bytes = dbi->pages.total * envinfo.mi_dxb_pagesize;
print(" %s: subtotal %" PRIu64 " bytes (%.1f%%),"
" payload %" PRIu64 " (%.1f%%), unused %" PRIu64 " (%.1f%%)",
dbi->name, dbi_bytes, dbi_bytes * 100.0 / total_page_bytes,
dbi->payload_bytes, dbi->payload_bytes * 100.0 / dbi_bytes,
sdb_name(&dbi->name), dbi_bytes,
dbi_bytes * 100.0 / total_page_bytes, dbi->payload_bytes,
dbi->payload_bytes * 100.0 / dbi_bytes,
dbi_bytes - dbi->payload_bytes,
(dbi_bytes - dbi->payload_bytes) * 100.0 / dbi_bytes);
if (dbi->pages.empty)
@ -1579,7 +1708,7 @@ int main(int argc, char *argv[]) {
print(", %" PRIu64 " bytes lost", dbi->lost_bytes);
print("\n");
} else
print(" %s: empty\n", dbi->name);
print(" %s: empty\n", sdb_name(&dbi->name));
}
print(" - summary: average fill %.1f%%",
walk.total_payload_bytes * 100.0 / total_page_bytes);
@ -1594,21 +1723,12 @@ int main(int argc, char *argv[]) {
fflush(nullptr);
}
if (!verbose)
print("Iterating DBIs...\n");
if (data_tree_problems) {
print("Skip processing %s since tree is corrupted (%u problems)\n", "@MAIN",
data_tree_problems);
problems_maindb = data_tree_problems;
} else
problems_maindb = process_db(~0u, /* MAIN_DBI */ nullptr, nullptr, false);
if (gc_tree_problems) {
print("Skip processing %s since tree is corrupted (%u problems)\n", "@GC",
gc_tree_problems);
print("Skip processing %s since %s is corrupted (%u problems)\n", "@GC",
"b-tree", gc_tree_problems);
problems_freedb = gc_tree_problems;
} else
problems_freedb = process_db(FREE_DBI, "@GC", handle_freedb, false);
problems_freedb = process_db(FREE_DBI, MDBX_PGWALK_GC, handle_freedb);
if (verbose) {
uint64_t value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize;
@ -1640,7 +1760,7 @@ int main(int argc, char *argv[]) {
print(", available %" PRIu64 " (%.1f%%)\n", value, value / percent);
}
if (problems_maindb == 0 && problems_freedb == 0) {
if ((problems_maindb = data_tree_problems) == 0 && problems_freedb == 0) {
if (!dont_traversal &&
(envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) {
if (walk.pgcount != alloc_pages - gc_pages) {
@ -1649,22 +1769,32 @@ int main(int argc, char *argv[]) {
walk.pgcount, alloc_pages - gc_pages);
}
if (unused_pages != gc_pages) {
error("gc pages mismatch (%" PRIu64 "(expected) != %" PRIu64 "(GC))\n",
error("GC pages mismatch (%" PRIu64 "(expected) != %" PRIu64 "(GC))\n",
unused_pages, gc_pages);
}
} else if (verbose) {
print(" - skip check used and gc pages (btree-traversal with "
print(" - skip check used and GC pages (btree-traversal with "
"monopolistic or read-write mode only)\n");
}
if (!process_db(MAIN_DBI, nullptr, handle_maindb, true)) {
problems_maindb = process_db(~0u, /* MAIN_DBI */ nullptr, nullptr);
if (problems_maindb == 0) {
print("Scanning %s for %s...\n", "@MAIN", "sub-database(s)");
if (!process_db(MAIN_DBI, nullptr, handle_maindb)) {
if (!userdb_count && verbose)
print(" - does not contain multiple databases\n");
}
} else {
print("Skip processing %s since %s is corrupted (%u problems)\n",
"sub-database(s)", "@MAIN", problems_maindb);
}
} else {
print("Skip processing %s since %s is corrupted (%u problems)\n", "@MAIN",
"b-tree", data_tree_problems);
}
if (rc == 0 && total_problems == 1 && problems_meta == 1 && !dont_traversal &&
(envflags & MDBX_RDONLY) == 0 && !only_subdb && stuck_meta < 0 &&
(envflags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && stuck_meta < 0 &&
get_meta_txnid(meta_recent(true)) < envinfo.mi_recent_txnid) {
print("Perform sync-to-disk for make steady checkpoint at txn-id #%" PRIi64
"\n",
@ -1683,7 +1813,7 @@ int main(int argc, char *argv[]) {
}
}
if (turn_meta && stuck_meta >= 0 && !dont_traversal && !only_subdb &&
if (turn_meta && stuck_meta >= 0 && !dont_traversal && !only_subdb.iov_base &&
(envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) {
const bool successful_check = (rc | total_problems | problems_meta) == 0;
if (successful_check || force_turn_meta) {

View File

@ -1,7 +1,7 @@
/* mdbx_copy.c - memory-mapped database backup tool */
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -20,7 +20,7 @@
#pragma warning(disable : 4996) /* The POSIX name is deprecated... */
#endif /* _MSC_VER (warnings) */
#define xMDBX_TOOLS /* Avoid using internal mdbx_assert() */
#define xMDBX_TOOLS /* Avoid using internal eASSERT() */
#include "internals.h"
#if defined(_WIN32) || defined(_WIN64)
@ -44,11 +44,14 @@ static void signal_handler(int sig) {
#endif /* !WINDOWS */
static void usage(const char *prog) {
fprintf(stderr,
"usage: %s [-V] [-q] [-c] src_path [dest_path]\n"
fprintf(
stderr,
"usage: %s [-V] [-q] [-c] [-u|U] src_path [dest_path]\n"
" -V\t\tprint version and exit\n"
" -q\t\tbe quiet\n"
" -c\t\tenable compactification (skip unused pages)\n"
" -u\t\twarmup database before copying\n"
" -U\t\twarmup and try lock database pages in memory before copying\n"
" src_path\tsource database\n"
" dest_path\tdestination (stdout if not specified)\n",
prog);
@ -62,6 +65,8 @@ int main(int argc, char *argv[]) {
unsigned flags = MDBX_RDONLY;
unsigned cpflags = 0;
bool quiet = false;
bool warmup = false;
MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default;
for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) {
if (argv[1][1] == 'n' && argv[1][2] == '\0')
@ -70,7 +75,13 @@ int main(int argc, char *argv[]) {
cpflags |= MDBX_CP_COMPACT;
else if (argv[1][1] == 'q' && argv[1][2] == '\0')
quiet = true;
else if ((argv[1][1] == 'h' && argv[1][2] == '\0') ||
else if (argv[1][1] == 'u' && argv[1][2] == '\0')
warmup = true;
else if (argv[1][1] == 'U' && argv[1][2] == '\0') {
warmup = true;
warmup_flags =
MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock;
} else if ((argv[1][1] == 'h' && argv[1][2] == '\0') ||
strcmp(argv[1], "--help") == 0)
usage(progname);
else if (argv[1][1] == 'V' && argv[1][2] == '\0') {
@ -120,7 +131,12 @@ int main(int argc, char *argv[]) {
if (rc == MDBX_SUCCESS)
rc = mdbx_env_open(env, argv[1], flags, 0);
if (rc == MDBX_SUCCESS) {
if (rc == MDBX_SUCCESS && warmup) {
act = "warming up";
rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536);
}
if (!MDBX_IS_ERROR(rc)) {
act = "copying";
if (argc == 2) {
mdbx_filehandle_t fd;

View File

@ -1,10 +1,10 @@
/* mdbx_drop.c - memory-mapped database delete tool */
/*
* Copyright 2021 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2021-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
*
* Copyright 2016-2022 Howard Chu, Symas Corp.
* Copyright 2016-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -22,7 +22,7 @@
#pragma warning(disable : 4996) /* The POSIX name is deprecated... */
#endif /* _MSC_VER (warnings) */
#define xMDBX_TOOLS /* Avoid using internal mdbx_assert() */
#define xMDBX_TOOLS /* Avoid using internal eASSERT() */
#include "internals.h"
#include <ctype.h>

View File

@ -1,7 +1,7 @@
/* mdbx_dump.c - memory-mapped database dump tool */
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -20,7 +20,7 @@
#pragma warning(disable : 4996) /* The POSIX name is deprecated... */
#endif /* _MSC_VER (warnings) */
#define xMDBX_TOOLS /* Avoid using internal mdbx_assert() */
#define xMDBX_TOOLS /* Avoid using internal eASSERT() */
#include "internals.h"
#include <ctype.h>
@ -66,7 +66,7 @@ static const char hexc[] = "0123456789abcdef";
static void dumpbyte(unsigned char c) {
putchar(hexc[c >> 4]);
putchar(hexc[c & 0xf]);
putchar(hexc[c & 15]);
}
static void text(MDBX_val *v) {
@ -186,10 +186,10 @@ static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) {
error("mdbx_cursor_open", rc);
return rc;
}
if (MDBX_DEBUG > 0 && rescue) {
cursor->mc_flags |= C_SKIPORD;
if (rescue) {
cursor->mc_checking |= CC_SKIPORD;
if (cursor->mc_xcursor)
cursor->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD;
cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD;
}
while ((rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT)) ==
@ -217,8 +217,10 @@ static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) {
}
static void usage(void) {
fprintf(stderr,
"usage: %s [-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s subdb] "
fprintf(
stderr,
"usage: %s "
"[-V] [-q] [-f file] [-l] [-p] [-r] [-a|-s subdb] [-u|U] "
"dbpath\n"
" -V\t\tprint version and exit\n"
" -q\t\tbe quiet\n"
@ -228,6 +230,8 @@ static void usage(void) {
" -r\t\trescue mode (ignore errors to dump corrupted DB)\n"
" -a\t\tdump main DB and all subDBs\n"
" -s name\tdump only the specified named subDB\n"
" -u\t\twarmup database before dumping\n"
" -U\t\twarmup and try lock database pages in memory before dumping\n"
" \t\tby default dump only the main DB\n",
prog);
exit(EXIT_FAILURE);
@ -250,11 +254,14 @@ int main(int argc, char *argv[]) {
char *subname = nullptr, *buf4free = nullptr;
unsigned envflags = 0;
bool alldbs = false, list = false;
bool warmup = false;
MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default;
if (argc < 2)
usage();
while ((i = getopt(argc, argv,
"uU"
"a"
"f:"
"l"
@ -311,6 +318,14 @@ int main(int argc, char *argv[]) {
case 'r':
rescue = true;
break;
case 'u':
warmup = true;
break;
case 'U':
warmup = true;
warmup_flags =
MDBX_warmup_force | MDBX_warmup_touchlimit | MDBX_warmup_lock;
break;
default:
usage();
}
@ -356,12 +371,22 @@ int main(int argc, char *argv[]) {
rc = mdbx_env_open(
env, envname,
envflags | (rescue ? MDBX_RDONLY | MDBX_EXCLUSIVE : MDBX_RDONLY), 0);
envflags | (rescue ? MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION
: MDBX_RDONLY),
0);
if (unlikely(rc != MDBX_SUCCESS)) {
error("mdbx_env_open", rc);
goto env_close;
}
if (warmup) {
rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536);
if (MDBX_IS_ERROR(rc)) {
error("mdbx_env_warmup", rc);
goto env_close;
}
}
rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn);
if (unlikely(rc != MDBX_SUCCESS)) {
error("mdbx_txn_begin", rc);
@ -383,10 +408,10 @@ int main(int argc, char *argv[]) {
error("mdbx_cursor_open", rc);
goto txn_abort;
}
if (MDBX_DEBUG > 0 && rescue) {
cursor->mc_flags |= C_SKIPORD;
if (rescue) {
cursor->mc_checking |= CC_SKIPORD;
if (cursor->mc_xcursor)
cursor->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD;
cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD;
}
bool have_raw = false;
@ -401,7 +426,7 @@ int main(int argc, char *argv[]) {
if (memchr(key.iov_base, '\0', key.iov_len))
continue;
subname = mdbx_realloc(buf4free, key.iov_len + 1);
subname = osal_realloc(buf4free, key.iov_len + 1);
if (!subname) {
rc = MDBX_ENOMEM;
break;

View File

@ -1,7 +1,7 @@
/* mdbx_load.c - memory-mapped database load tool */
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -20,7 +20,7 @@
#pragma warning(disable : 4996) /* The POSIX name is deprecated... */
#endif /* _MSC_VER (warnings) */
#define xMDBX_TOOLS /* Avoid using internal mdbx_assert() */
#define xMDBX_TOOLS /* Avoid using internal eASSERT() */
#include "internals.h"
#include <ctype.h>
@ -213,7 +213,7 @@ static int readhdr(void) {
if (str) {
if (*str) {
free(subname);
subname = mdbx_strdup(str);
subname = osal_strdup(str);
if (!subname) {
if (!quiet)
perror("strdup()");
@ -421,7 +421,7 @@ __hot static int readline(MDBX_val *out, MDBX_val *buf) {
/* Is buffer too short? */
while (c1[len - 1] != '\n') {
buf->iov_base = mdbx_realloc(buf->iov_base, buf->iov_len * 2);
buf->iov_base = osal_realloc(buf->iov_base, buf->iov_len * 2);
if (!buf->iov_base) {
if (!quiet)
fprintf(stderr,
@ -560,7 +560,7 @@ int main(int argc, char *argv[]) {
envflags |= MDBX_NOSUBDIR;
break;
case 's':
subname = mdbx_strdup(optarg);
subname = osal_strdup(optarg);
break;
case 'N':
putflags |= MDBX_NOOVERWRITE | MDBX_NODUPDATA;
@ -606,7 +606,7 @@ int main(int argc, char *argv[]) {
fflush(nullptr);
dbuf.iov_len = 4096;
dbuf.iov_base = mdbx_malloc(dbuf.iov_len);
dbuf.iov_base = osal_malloc(dbuf.iov_len);
if (!dbuf.iov_base) {
rc = MDBX_ENOMEM;
error("value-buffer", rc);
@ -673,7 +673,7 @@ int main(int argc, char *argv[]) {
goto env_close;
}
kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, 0) + 1;
kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, 0) + (size_t)1;
if (kbuf.iov_len >= INTPTR_MAX / 2) {
if (!quiet)
fprintf(stderr, "mdbx_env_get_maxkeysize() failed, returns %zu\n",

View File

@ -1,7 +1,7 @@
/* mdbx_stat.c - memory-mapped database status tool */
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -20,7 +20,7 @@
#pragma warning(disable : 4996) /* The POSIX name is deprecated... */
#endif /* _MSC_VER (warnings) */
#define xMDBX_TOOLS /* Avoid using internal mdbx_assert() */
#define xMDBX_TOOLS /* Avoid using internal eASSERT() */
#include "internals.h"
#if defined(_WIN32) || defined(_WIN64)
@ -256,6 +256,17 @@ int main(int argc, char *argv[]) {
printf(" WOP: %8" PRIu64
"\t// number of explicit write operations (not a pages) to a disk\n",
mei.mi_pgop_stat.wops);
printf(" PreFault: %8" PRIu64
"\t// number of prefault write operations (not a pages)\n",
mei.mi_pgop_stat.prefault);
printf(" mInCore: %8" PRIu64 "\t// number of mincore() calls\n",
mei.mi_pgop_stat.mincore);
printf(" mSync: %8" PRIu64
"\t// number of explicit msync-to-disk operations (not a pages)\n",
mei.mi_pgop_stat.msync);
printf(" fSync: %8" PRIu64
"\t// number of explicit fsync-to-disk operations (not a pages)\n",
mei.mi_pgop_stat.fsync);
}
if (envinfo) {
@ -469,13 +480,13 @@ int main(int argc, char *argv[]) {
MDBX_dbi subdbi;
if (memchr(key.iov_base, '\0', key.iov_len))
continue;
subname = mdbx_malloc(key.iov_len + 1);
subname = osal_malloc(key.iov_len + 1);
memcpy(subname, key.iov_base, key.iov_len);
subname[key.iov_len] = '\0';
rc = mdbx_dbi_open(txn, subname, MDBX_DB_ACCEDE, &subdbi);
if (rc == MDBX_SUCCESS)
printf("Status of %s\n", subname);
mdbx_free(subname);
osal_free(subname);
if (unlikely(rc != MDBX_SUCCESS)) {
if (rc == MDBX_INCOMPATIBLE)
continue;

View File

@ -40,6 +40,8 @@
#define MDBX_ENV_CHECKPID 1
#endif
#define MDBX_ENV_CHECKPID_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
#elif !(MDBX_ENV_CHECKPID == 0 || MDBX_ENV_CHECKPID == 1)
#error MDBX_ENV_CHECKPID must be defined as 0 or 1
#else
#define MDBX_ENV_CHECKPID_CONFIG MDBX_STRINGIFY(MDBX_ENV_CHECKPID)
#endif /* MDBX_ENV_CHECKPID */
@ -49,6 +51,8 @@
#ifndef MDBX_TXN_CHECKOWNER
#define MDBX_TXN_CHECKOWNER 1
#define MDBX_TXN_CHECKOWNER_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
#elif !(MDBX_TXN_CHECKOWNER == 0 || MDBX_TXN_CHECKOWNER == 1)
#error MDBX_TXN_CHECKOWNER must be defined as 0 or 1
#else
#define MDBX_TXN_CHECKOWNER_CONFIG MDBX_STRINGIFY(MDBX_TXN_CHECKOWNER)
#endif /* MDBX_TXN_CHECKOWNER */
@ -62,6 +66,8 @@
#define MDBX_TRUST_RTC 1
#endif
#define MDBX_TRUST_RTC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_TRUST_RTC)
#elif !(MDBX_TRUST_RTC == 0 || MDBX_TRUST_RTC == 1)
#error MDBX_TRUST_RTC must be defined as 0 or 1
#else
#define MDBX_TRUST_RTC_CONFIG MDBX_STRINGIFY(MDBX_TRUST_RTC)
#endif /* MDBX_TRUST_RTC */
@ -73,6 +79,13 @@
#error MDBX_ENABLE_REFUND must be defined as 0 or 1
#endif /* MDBX_ENABLE_REFUND */
/** Controls profiling of GC search and updates. */
#ifndef MDBX_ENABLE_PROFGC
#define MDBX_ENABLE_PROFGC 0
#elif !(MDBX_ENABLE_PROFGC == 0 || MDBX_ENABLE_PROFGC == 1)
#error MDBX_ENABLE_PROFGC must be defined as 0 or 1
#endif /* MDBX_ENABLE_PROFGC */
/** Controls gathering statistics for page operations. */
#ifndef MDBX_ENABLE_PGOP_STAT
#define MDBX_ENABLE_PGOP_STAT 1
@ -80,7 +93,32 @@
#error MDBX_ENABLE_PGOP_STAT must be defined as 0 or 1
#endif /* MDBX_ENABLE_PGOP_STAT */
/** Controls use of POSIX madvise() hints and friends. */
/** Controls using Unix' mincore() to determine whether DB-pages
* are resident in memory. */
#ifndef MDBX_ENABLE_MINCORE
#if MDBX_ENABLE_PREFAULT && \
(defined(MINCORE_INCORE) || !(defined(_WIN32) || defined(_WIN64)))
#define MDBX_ENABLE_MINCORE 1
#else
#define MDBX_ENABLE_MINCORE 0
#endif
#elif !(MDBX_ENABLE_MINCORE == 0 || MDBX_ENABLE_MINCORE == 1)
#error MDBX_ENABLE_MINCORE must be defined as 0 or 1
#endif /* MDBX_ENABLE_MINCORE */
/** Enables chunking long list of retired pages during huge transactions commit
* to avoid use sequences of pages. */
#ifndef MDBX_ENABLE_BIGFOOT
#if MDBX_WORDBITS >= 64 || defined(DOXYGEN)
#define MDBX_ENABLE_BIGFOOT 1
#else
#define MDBX_ENABLE_BIGFOOT 0
#endif
#elif !(MDBX_ENABLE_BIGFOOT == 0 || MDBX_ENABLE_BIGFOOT == 1)
#error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1
#endif /* MDBX_ENABLE_BIGFOOT */
/** Controls using of POSIX' madvise() and/or similar hints. */
#ifndef MDBX_ENABLE_MADVISE
#define MDBX_ENABLE_MADVISE 1
#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1)
@ -89,11 +127,11 @@
/** Disable some checks to reduce an overhead and detection probability of
* database corruption to a values closer to the LMDB. */
#ifndef MDBX_DISABLE_PAGECHECKS
#define MDBX_DISABLE_PAGECHECKS 0
#elif !(MDBX_DISABLE_PAGECHECKS == 0 || MDBX_DISABLE_PAGECHECKS == 1)
#error MDBX_DISABLE_PAGECHECKS must be defined as 0 or 1
#endif /* MDBX_DISABLE_PAGECHECKS */
#ifndef MDBX_DISABLE_VALIDATION
#define MDBX_DISABLE_VALIDATION 0
#elif !(MDBX_DISABLE_VALIDATION == 0 || MDBX_DISABLE_VALIDATION == 1)
#error MDBX_DISABLE_VALIDATION must be defined as 0 or 1
#endif /* MDBX_DISABLE_VALIDATION */
#ifndef MDBX_PNL_PREALLOC_FOR_RADIXSORT
#define MDBX_PNL_PREALLOC_FOR_RADIXSORT 1
@ -109,23 +147,22 @@
#error MDBX_DPL_PREALLOC_FOR_RADIXSORT must be defined as 0 or 1
#endif /* MDBX_DPL_PREALLOC_FOR_RADIXSORT */
/** Basically, this build-option is for TODO. Guess it should be replaced
* with MDBX_ENABLE_WRITEMAP_SPILLING with the three variants:
* 0/OFF = Don't track dirty pages at all and don't spilling ones.
* This should be by-default on Linux and may-be other systems
* (not sure: Darwin/OSX, FreeBSD, Windows 10) where kernel provides
* properly LRU tracking and async writing on-demand.
* 1/ON = Lite tracking of dirty pages but with LRU labels and explicit
* spilling with msync(MS_ASYNC). */
#ifndef MDBX_FAKE_SPILL_WRITEMAP
#if defined(__linux__) || defined(__gnu_linux__)
#define MDBX_FAKE_SPILL_WRITEMAP 1 /* msync(MS_ASYNC) is no-op on Linux */
/** Controls dirty pages tracking, spilling and persisting in MDBX_WRITEMAP
* mode. 0/OFF = Don't track dirty pages at all, don't spill ones, and use
* msync() to persist data. This is by-default on Linux and other systems where
* kernel provides properly LRU tracking and effective flushing on-demand. 1/ON
* = Tracking of dirty pages but with LRU labels for spilling and explicit
* persist ones by write(). This may be reasonable for systems which low
* performance of msync() and/or LRU tracking. */
#ifndef MDBX_AVOID_MSYNC
#if defined(_WIN32) || defined(_WIN64)
#define MDBX_AVOID_MSYNC 1
#else
#define MDBX_FAKE_SPILL_WRITEMAP 0
#define MDBX_AVOID_MSYNC 0
#endif
#elif !(MDBX_FAKE_SPILL_WRITEMAP == 0 || MDBX_FAKE_SPILL_WRITEMAP == 1)
#error MDBX_FAKE_SPILL_WRITEMAP must be defined as 0 or 1
#endif /* MDBX_FAKE_SPILL_WRITEMAP */
#elif !(MDBX_AVOID_MSYNC == 0 || MDBX_AVOID_MSYNC == 1)
#error MDBX_AVOID_MSYNC must be defined as 0 or 1
#endif /* MDBX_AVOID_MSYNC */
/** Controls sort order of internal page number lists.
* This mostly experimental/advanced option with not for regular MDBX users.
@ -182,6 +219,31 @@
#ifndef MDBX_HAVE_C11ATOMICS
#endif /* MDBX_HAVE_C11ATOMICS */
/** If defined then enables use the GCC's `__builtin_cpu_supports()`
* for runtime dispatching depending on the CPU's capabilities.
* \note Defining `MDBX_HAVE_BUILTIN_CPU_SUPPORTS` to `0` should avoided unless
* build for particular single-target platform, since on AMD64/x86 this disables
* dynamic choice (at runtime) of SSE2 / AVX2 / AVX512 instructions
* with fallback to non-accelerated baseline code. */
#ifndef MDBX_HAVE_BUILTIN_CPU_SUPPORTS
#if defined(__APPLE__) || defined(BIONIC)
/* Never use any modern features on Apple's or Google's OSes
* since a lot of troubles with compatibility and/or performance */
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
#elif defined(__e2k__)
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
#elif __has_builtin(__builtin_cpu_supports) || \
defined(__BUILTIN_CPU_SUPPORTS__) || \
(defined(__ia32__) && __GNUC_PREREQ(4, 8) && __GLIBC_PREREQ(2, 23))
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 1
#else
#define MDBX_HAVE_BUILTIN_CPU_SUPPORTS 0
#endif
#elif !(MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 0 || \
MDBX_HAVE_BUILTIN_CPU_SUPPORTS == 1)
#error MDBX_HAVE_BUILTIN_CPU_SUPPORTS must be defined as 0 or 1
#endif /* MDBX_HAVE_BUILTIN_CPU_SUPPORTS */
//------------------------------------------------------------------------------
/** Win32 File Locking API for \ref MDBX_LOCKING */
@ -248,6 +310,8 @@
#define MDBX_USE_OFDLOCKS 0
#endif
#define MDBX_USE_OFDLOCKS_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
#elif !(MDBX_USE_OFDLOCKS == 0 || MDBX_USE_OFDLOCKS == 1)
#error MDBX_USE_OFDLOCKS must be defined as 0 or 1
#else
#define MDBX_USE_OFDLOCKS_CONFIG MDBX_STRINGIFY(MDBX_USE_OFDLOCKS)
#endif /* MDBX_USE_OFDLOCKS */
@ -261,6 +325,8 @@
#else
#define MDBX_USE_SENDFILE 0
#endif
#elif !(MDBX_USE_SENDFILE == 0 || MDBX_USE_SENDFILE == 1)
#error MDBX_USE_SENDFILE must be defined as 0 or 1
#endif /* MDBX_USE_SENDFILE */
/** Advanced: Using copy_file_range() syscall (autodetection by default). */
@ -270,6 +336,8 @@
#else
#define MDBX_USE_COPYFILERANGE 0
#endif
#elif !(MDBX_USE_COPYFILERANGE == 0 || MDBX_USE_COPYFILERANGE == 1)
#error MDBX_USE_COPYFILERANGE must be defined as 0 or 1
#endif /* MDBX_USE_COPYFILERANGE */
/** Advanced: Using sync_file_range() syscall (autodetection by default). */
@ -281,6 +349,8 @@
#else
#define MDBX_USE_SYNCFILERANGE 0
#endif
#elif !(MDBX_USE_SYNCFILERANGE == 0 || MDBX_USE_SYNCFILERANGE == 1)
#error MDBX_USE_SYNCFILERANGE must be defined as 0 or 1
#endif /* MDBX_USE_SYNCFILERANGE */
//------------------------------------------------------------------------------
@ -292,6 +362,9 @@
#else
#define MDBX_CPU_WRITEBACK_INCOHERENT 1
#endif
#elif !(MDBX_CPU_WRITEBACK_INCOHERENT == 0 || \
MDBX_CPU_WRITEBACK_INCOHERENT == 1)
#error MDBX_CPU_WRITEBACK_INCOHERENT must be defined as 0 or 1
#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
#ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE
@ -300,6 +373,9 @@
#else
#define MDBX_MMAP_INCOHERENT_FILE_WRITE 0
#endif
#elif !(MDBX_MMAP_INCOHERENT_FILE_WRITE == 0 || \
MDBX_MMAP_INCOHERENT_FILE_WRITE == 1)
#error MDBX_MMAP_INCOHERENT_FILE_WRITE must be defined as 0 or 1
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
#ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE
@ -312,8 +388,21 @@
/* LY: assume no relevant mmap/dcache issues. */
#define MDBX_MMAP_INCOHERENT_CPU_CACHE 0
#endif
#elif !(MDBX_MMAP_INCOHERENT_CPU_CACHE == 0 || \
MDBX_MMAP_INCOHERENT_CPU_CACHE == 1)
#error MDBX_MMAP_INCOHERENT_CPU_CACHE must be defined as 0 or 1
#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
#ifndef MDBX_MMAP_USE_MS_ASYNC
#if MDBX_MMAP_INCOHERENT_FILE_WRITE || MDBX_MMAP_INCOHERENT_CPU_CACHE
#define MDBX_MMAP_USE_MS_ASYNC 1
#else
#define MDBX_MMAP_USE_MS_ASYNC 0
#endif
#elif !(MDBX_MMAP_USE_MS_ASYNC == 0 || MDBX_MMAP_USE_MS_ASYNC == 1)
#error MDBX_MMAP_USE_MS_ASYNC must be defined as 0 or 1
#endif /* MDBX_MMAP_USE_MS_ASYNC */
#ifndef MDBX_64BIT_ATOMIC
#if MDBX_WORDBITS >= 64 || defined(DOXYGEN)
#define MDBX_64BIT_ATOMIC 1
@ -321,6 +410,8 @@
#define MDBX_64BIT_ATOMIC 0
#endif
#define MDBX_64BIT_ATOMIC_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
#elif !(MDBX_64BIT_ATOMIC == 0 || MDBX_64BIT_ATOMIC == 1)
#error MDBX_64BIT_ATOMIC must be defined as 0 or 1
#else
#define MDBX_64BIT_ATOMIC_CONFIG MDBX_STRINGIFY(MDBX_64BIT_ATOMIC)
#endif /* MDBX_64BIT_ATOMIC */
@ -346,6 +437,8 @@
#endif
#elif defined(_MSC_VER) || defined(__APPLE__) || defined(DOXYGEN)
#define MDBX_64BIT_CAS 1
#elif !(MDBX_64BIT_CAS == 0 || MDBX_64BIT_CAS == 1)
#error MDBX_64BIT_CAS must be defined as 0 or 1
#else
#define MDBX_64BIT_CAS MDBX_64BIT_ATOMIC
#endif
@ -355,14 +448,11 @@
#endif /* MDBX_64BIT_CAS */
#ifndef MDBX_UNALIGNED_OK
#if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__)
#if defined(__ALIGNED__) || defined(__SANITIZE_UNDEFINED__) || \
defined(ENABLE_UBSAN)
#define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
#elif defined(__ARM_FEATURE_UNALIGNED)
#define MDBX_UNALIGNED_OK 4 /* ok unaligned for 32-bit words */
#elif __CLANG_PREREQ(5, 0) || __GNUC_PREREQ(5, 0)
/* expecting an optimization will well done, also this
* hushes false-positives from UBSAN (undefined behaviour sanitizer) */
#define MDBX_UNALIGNED_OK 0
#elif defined(__e2k__) || defined(__elbrus__)
#if __iset__ > 4
#define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
@ -371,6 +461,10 @@
#endif
#elif defined(__ia32__)
#define MDBX_UNALIGNED_OK 8 /* ok unaligned for 64-bit words */
#elif __CLANG_PREREQ(5, 0) || __GNUC_PREREQ(5, 0)
/* expecting an optimization will well done, also this
* hushes false-positives from UBSAN (undefined behaviour sanitizer) */
#define MDBX_UNALIGNED_OK 0
#else
#define MDBX_UNALIGNED_OK 0 /* no unaligned access allowed */
#endif

1391
src/osal.c

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */
/*
* Copyright 2015-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2015-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -58,7 +58,7 @@
#include <sys/cachectl.h>
#endif
MDBX_MAYBE_UNUSED static __inline void mdbx_compiler_barrier(void) {
MDBX_MAYBE_UNUSED static __inline void osal_compiler_barrier(void) {
#if defined(__clang__) || defined(__GNUC__)
__asm__ __volatile__("" ::: "memory");
#elif defined(_MSC_VER)
@ -78,7 +78,7 @@ MDBX_MAYBE_UNUSED static __inline void mdbx_compiler_barrier(void) {
#endif
}
MDBX_MAYBE_UNUSED static __inline void mdbx_memory_barrier(void) {
MDBX_MAYBE_UNUSED static __inline void osal_memory_barrier(void) {
#ifdef MDBX_HAVE_C11ATOMICS
atomic_thread_fence(memory_order_seq_cst);
#elif defined(__ATOMIC_SEQ_CST)
@ -116,8 +116,8 @@ MDBX_MAYBE_UNUSED static __inline void mdbx_memory_barrier(void) {
#if defined(_WIN32) || defined(_WIN64)
#define HAVE_SYS_STAT_H
#define HAVE_SYS_TYPES_H
typedef HANDLE mdbx_thread_t;
typedef unsigned mdbx_thread_key_t;
typedef HANDLE osal_thread_t;
typedef unsigned osal_thread_key_t;
#define MAP_FAILED NULL
#define HIGH_DWORD(v) ((DWORD)((sizeof(v) > 4) ? ((uint64_t)(v) >> 32) : 0))
#define THREAD_CALL WINAPI
@ -125,8 +125,8 @@ typedef unsigned mdbx_thread_key_t;
typedef struct {
HANDLE mutex;
HANDLE event[2];
} mdbx_condpair_t;
typedef CRITICAL_SECTION mdbx_fastmutex_t;
} osal_condpair_t;
typedef CRITICAL_SECTION osal_fastmutex_t;
#if !defined(_MSC_VER) && !defined(__try)
/* *INDENT-OFF* */
@ -139,36 +139,36 @@ typedef CRITICAL_SECTION mdbx_fastmutex_t;
#if MDBX_WITHOUT_MSVC_CRT
#ifndef mdbx_malloc
static inline void *mdbx_malloc(size_t bytes) {
#ifndef osal_malloc
static inline void *osal_malloc(size_t bytes) {
return HeapAlloc(GetProcessHeap(), 0, bytes);
}
#endif /* mdbx_malloc */
#endif /* osal_malloc */
#ifndef mdbx_calloc
static inline void *mdbx_calloc(size_t nelem, size_t size) {
#ifndef osal_calloc
static inline void *osal_calloc(size_t nelem, size_t size) {
return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, nelem * size);
}
#endif /* mdbx_calloc */
#endif /* osal_calloc */
#ifndef mdbx_realloc
static inline void *mdbx_realloc(void *ptr, size_t bytes) {
#ifndef osal_realloc
static inline void *osal_realloc(void *ptr, size_t bytes) {
return ptr ? HeapReAlloc(GetProcessHeap(), 0, ptr, bytes)
: HeapAlloc(GetProcessHeap(), 0, bytes);
}
#endif /* mdbx_realloc */
#endif /* osal_realloc */
#ifndef mdbx_free
static inline void mdbx_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
#endif /* mdbx_free */
#ifndef osal_free
static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
#endif /* osal_free */
#else /* MDBX_WITHOUT_MSVC_CRT */
#define mdbx_malloc malloc
#define mdbx_calloc calloc
#define mdbx_realloc realloc
#define mdbx_free free
#define mdbx_strdup _strdup
#define osal_malloc malloc
#define osal_calloc calloc
#define osal_realloc realloc
#define osal_free free
#define osal_strdup _strdup
#endif /* MDBX_WITHOUT_MSVC_CRT */
@ -180,26 +180,23 @@ static inline void mdbx_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
#define vsnprintf _vsnprintf /* ntdll */
#endif
size_t mdbx_mb2w(wchar_t *dst, size_t dst_n, const char *src, size_t src_n);
size_t mdbx_w2mb(char *dst, size_t dst_n, const wchar_t *src, size_t src_n);
#else /*----------------------------------------------------------------------*/
typedef pthread_t mdbx_thread_t;
typedef pthread_key_t mdbx_thread_key_t;
typedef pthread_t osal_thread_t;
typedef pthread_key_t osal_thread_key_t;
#define INVALID_HANDLE_VALUE (-1)
#define THREAD_CALL
#define THREAD_RESULT void *
typedef struct {
pthread_mutex_t mutex;
pthread_cond_t cond[2];
} mdbx_condpair_t;
typedef pthread_mutex_t mdbx_fastmutex_t;
#define mdbx_malloc malloc
#define mdbx_calloc calloc
#define mdbx_realloc realloc
#define mdbx_free free
#define mdbx_strdup strdup
} osal_condpair_t;
typedef pthread_mutex_t osal_fastmutex_t;
#define osal_malloc malloc
#define osal_calloc calloc
#define osal_realloc realloc
#define osal_free free
#define osal_strdup strdup
#endif /* Platform */
#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size)
@ -213,24 +210,30 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
/*----------------------------------------------------------------------------*/
/* OS abstraction layer stuff */
MDBX_INTERNAL_VAR unsigned sys_pagesize;
MDBX_MAYBE_UNUSED MDBX_INTERNAL_VAR unsigned sys_pagesize_ln2,
sys_allocation_granularity;
/* Get the size of a memory page for the system.
* This is the basic size that the platform's memory manager uses, and is
* fundamental to the use of memory-mapped files. */
MDBX_MAYBE_UNUSED MDBX_NOTHROW_CONST_FUNCTION static __inline size_t
mdbx_syspagesize(void) {
#if defined(_WIN32) || defined(_WIN64)
SYSTEM_INFO si;
GetSystemInfo(&si);
return si.dwPageSize;
#else
return sysconf(_SC_PAGE_SIZE);
#endif
osal_syspagesize(void) {
assert(sys_pagesize > 0 && (sys_pagesize & (sys_pagesize - 1)) == 0);
return sys_pagesize;
}
typedef struct mdbx_mmap_param {
#if defined(_WIN32) || defined(_WIN64)
typedef wchar_t pathchar_t;
#define MDBX_PRIsPATH "ls"
#else
typedef char pathchar_t;
#define MDBX_PRIsPATH "s"
#endif
typedef struct osal_mmap {
union {
void *address;
uint8_t *dxb;
void *base;
struct MDBX_lockinfo *lck;
};
mdbx_filehandle_t fd;
@ -240,7 +243,7 @@ typedef struct mdbx_mmap_param {
#if defined(_WIN32) || defined(_WIN64)
HANDLE section; /* memory-mapped section handle */
#endif
} mdbx_mmap_t;
} osal_mmap_t;
typedef union bin128 {
__anonymous_struct_extension__ struct { uint64_t x, y; };
@ -248,28 +251,162 @@ typedef union bin128 {
} bin128_t;
#if defined(_WIN32) || defined(_WIN64)
typedef union MDBX_srwlock {
typedef union osal_srwlock {
__anonymous_struct_extension__ struct {
long volatile readerCount;
long volatile writerCount;
};
RTL_SRWLOCK native;
} MDBX_srwlock;
} osal_srwlock_t;
#endif /* Windows */
#ifndef MDBX_HAVE_PWRITEV
#if defined(_WIN32) || defined(_WIN64)
#define MDBX_HAVE_PWRITEV 0
#elif defined(__ANDROID_API__)
#if __ANDROID_API__ < 24
#define MDBX_HAVE_PWRITEV 0
#else
#define MDBX_HAVE_PWRITEV 1
#endif
#elif defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_VERSION_11_0) && \
MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
/* FIXME: add checks for IOS versions, etc */
#define MDBX_HAVE_PWRITEV 1
#else
#define MDBX_HAVE_PWRITEV 0
#endif
#elif defined(_SC_IOV_MAX) || (defined(IOV_MAX) && IOV_MAX > 1)
#define MDBX_HAVE_PWRITEV 1
#else
#define MDBX_HAVE_PWRITEV 0
#endif
#endif /* MDBX_HAVE_PWRITEV */
typedef struct ior_item {
#if defined(_WIN32) || defined(_WIN64)
OVERLAPPED ov;
#define ior_svg_gap4terminator 1
#define ior_sgv_element FILE_SEGMENT_ELEMENT
#else
size_t offset;
#if MDBX_HAVE_PWRITEV
size_t sgvcnt;
#define ior_svg_gap4terminator 0
#define ior_sgv_element struct iovec
#endif /* MDBX_HAVE_PWRITEV */
#endif /* !Windows */
union {
MDBX_val single;
#if defined(ior_sgv_element)
ior_sgv_element sgv[1 + ior_svg_gap4terminator];
#endif /* ior_sgv_element */
};
} ior_item_t;
typedef struct osal_ioring {
unsigned slots_left;
unsigned allocated;
#if defined(_WIN32) || defined(_WIN64)
#define IOR_STATE_LOCKED 1
HANDLE overlapped_fd;
unsigned pagesize;
unsigned last_sgvcnt;
size_t last_bytes;
uint8_t direct, state, pagesize_ln2;
unsigned event_stack;
HANDLE *event_pool;
volatile LONG async_waiting;
volatile LONG async_completed;
HANDLE async_done;
#define ior_last_sgvcnt(ior, item) (ior)->last_sgvcnt
#define ior_last_bytes(ior, item) (ior)->last_bytes
#elif MDBX_HAVE_PWRITEV
unsigned last_bytes;
#define ior_last_sgvcnt(ior, item) (item)->sgvcnt
#define ior_last_bytes(ior, item) (ior)->last_bytes
#else
#define ior_last_sgvcnt(ior, item) (1)
#define ior_last_bytes(ior, item) (item)->single.iov_len
#endif /* !Windows */
ior_item_t *last;
ior_item_t *pool;
char *boundary;
} osal_ioring_t;
#ifndef __cplusplus
/* Actually this is not ioring for now, but on the way. */
MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *
#if defined(_WIN32) || defined(_WIN64)
,
bool enable_direct,
mdbx_filehandle_t overlapped_fd
#endif /* Windows */
);
MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *, size_t items);
MDBX_INTERNAL_FUNC void osal_ioring_destroy(osal_ioring_t *);
MDBX_INTERNAL_FUNC void osal_ioring_reset(osal_ioring_t *);
MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ctx, const size_t offset,
void *data, const size_t bytes);
typedef struct osal_ioring_write_result {
int err;
unsigned wops;
} osal_ioring_write_result_t;
MDBX_INTERNAL_FUNC osal_ioring_write_result_t
osal_ioring_write(osal_ioring_t *ior, mdbx_filehandle_t fd);
typedef struct iov_ctx iov_ctx_t;
MDBX_INTERNAL_FUNC void osal_ioring_walk(
osal_ioring_t *ior, iov_ctx_t *ctx,
void (*callback)(iov_ctx_t *ctx, size_t offset, void *data, size_t bytes));
MDBX_MAYBE_UNUSED static inline unsigned
osal_ioring_left(const osal_ioring_t *ior) {
return ior->slots_left;
}
MDBX_MAYBE_UNUSED static inline unsigned
osal_ioring_used(const osal_ioring_t *ior) {
return ior->allocated - ior->slots_left;
}
MDBX_MAYBE_UNUSED static inline int
osal_ioring_prepare(osal_ioring_t *ior, size_t items, size_t bytes) {
items = (items > 32) ? items : 32;
#if defined(_WIN32) || defined(_WIN64)
if (ior->direct) {
const size_t npages = bytes >> ior->pagesize_ln2;
items = (items > npages) ? items : npages;
}
#else
(void)bytes;
#endif
items = (items < 65536) ? items : 65536;
if (likely(ior->allocated >= items))
return MDBX_SUCCESS;
return osal_ioring_resize(ior, items);
}
/*----------------------------------------------------------------------------*/
/* libc compatibility stuff */
#if (!defined(__GLIBC__) && __GLIBC_PREREQ(2, 1)) && \
(defined(_GNU_SOURCE) || defined(_BSD_SOURCE))
#define mdbx_asprintf asprintf
#define mdbx_vasprintf vasprintf
#define osal_asprintf asprintf
#define osal_vasprintf vasprintf
#else
MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC
MDBX_PRINTF_ARGS(2, 3) int mdbx_asprintf(char **strp, const char *fmt, ...);
MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
MDBX_PRINTF_ARGS(2, 3) int osal_asprintf(char **strp, const char *fmt, ...);
MDBX_INTERNAL_FUNC int osal_vasprintf(char **strp, const char *fmt, va_list ap);
#endif
#if !defined(MADV_DODUMP) && defined(MADV_CORE)
@ -280,12 +417,14 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#define MADV_DONTDUMP MADV_NOCORE
#endif /* MADV_NOCORE -> MADV_DONTDUMP */
MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void mdbx_osal_jitter(bool tiny);
MDBX_MAYBE_UNUSED static __inline void mdbx_jitter4testing(bool tiny);
MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC void osal_jitter(bool tiny);
MDBX_MAYBE_UNUSED static __inline void jitter4testing(bool tiny);
/* max bytes to write in one call */
#if defined(_WIN32) || defined(_WIN64)
#define MAX_WRITE UINT32_C(0x01000000)
#if defined(_WIN64)
#define MAX_WRITE UINT32_C(0x10000000)
#elif defined(_WIN32)
#define MAX_WRITE UINT32_C(0x04000000)
#else
#define MAX_WRITE UINT32_C(0x3f000000)
@ -332,15 +471,15 @@ MDBX_MAYBE_UNUSED static __inline void mdbx_jitter4testing(bool tiny);
#endif
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
MDBX_INTERNAL_VAR uint32_t linux_kernel_version;
MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
LIBMDBX_API char *mdbx_strdup(const char *str);
#ifndef osal_strdup
LIBMDBX_API char *osal_strdup(const char *str);
#endif
MDBX_MAYBE_UNUSED static __inline int mdbx_get_errno(void) {
MDBX_MAYBE_UNUSED static __inline int osal_get_errno(void) {
#if defined(_WIN32) || defined(_WIN64)
DWORD rc = GetLastError();
#else
@ -349,84 +488,101 @@ MDBX_MAYBE_UNUSED static __inline int mdbx_get_errno(void) {
return rc;
}
#ifndef mdbx_memalign_alloc
MDBX_INTERNAL_FUNC int mdbx_memalign_alloc(size_t alignment, size_t bytes,
#ifndef osal_memalign_alloc
MDBX_INTERNAL_FUNC int osal_memalign_alloc(size_t alignment, size_t bytes,
void **result);
#endif
#ifndef mdbx_memalign_free
MDBX_INTERNAL_FUNC void mdbx_memalign_free(void *ptr);
#ifndef osal_memalign_free
MDBX_INTERNAL_FUNC void osal_memalign_free(void *ptr);
#endif
MDBX_INTERNAL_FUNC int mdbx_condpair_init(mdbx_condpair_t *condpair);
MDBX_INTERNAL_FUNC int mdbx_condpair_lock(mdbx_condpair_t *condpair);
MDBX_INTERNAL_FUNC int mdbx_condpair_unlock(mdbx_condpair_t *condpair);
MDBX_INTERNAL_FUNC int mdbx_condpair_signal(mdbx_condpair_t *condpair,
MDBX_INTERNAL_FUNC int osal_condpair_init(osal_condpair_t *condpair);
MDBX_INTERNAL_FUNC int osal_condpair_lock(osal_condpair_t *condpair);
MDBX_INTERNAL_FUNC int osal_condpair_unlock(osal_condpair_t *condpair);
MDBX_INTERNAL_FUNC int osal_condpair_signal(osal_condpair_t *condpair,
bool part);
MDBX_INTERNAL_FUNC int mdbx_condpair_wait(mdbx_condpair_t *condpair, bool part);
MDBX_INTERNAL_FUNC int mdbx_condpair_destroy(mdbx_condpair_t *condpair);
MDBX_INTERNAL_FUNC int osal_condpair_wait(osal_condpair_t *condpair, bool part);
MDBX_INTERNAL_FUNC int osal_condpair_destroy(osal_condpair_t *condpair);
MDBX_INTERNAL_FUNC int mdbx_fastmutex_init(mdbx_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int mdbx_fastmutex_acquire(mdbx_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int mdbx_fastmutex_destroy(mdbx_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int osal_fastmutex_release(osal_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int osal_fastmutex_destroy(osal_fastmutex_t *fastmutex);
MDBX_INTERNAL_FUNC int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
int iovcnt, uint64_t offset,
size_t expected_written);
MDBX_INTERNAL_FUNC int mdbx_pread(mdbx_filehandle_t fd, void *buf, size_t count,
MDBX_INTERNAL_FUNC int osal_pwritev(mdbx_filehandle_t fd, struct iovec *iov,
size_t sgvcnt, uint64_t offset);
MDBX_INTERNAL_FUNC int osal_pread(mdbx_filehandle_t fd, void *buf, size_t count,
uint64_t offset);
MDBX_INTERNAL_FUNC int mdbx_pwrite(mdbx_filehandle_t fd, const void *buf,
MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
size_t count, uint64_t offset);
MDBX_INTERNAL_FUNC int mdbx_write(mdbx_filehandle_t fd, const void *buf,
MDBX_INTERNAL_FUNC int osal_write(mdbx_filehandle_t fd, const void *buf,
size_t count);
MDBX_INTERNAL_FUNC int
mdbx_thread_create(mdbx_thread_t *thread,
osal_thread_create(osal_thread_t *thread,
THREAD_RESULT(THREAD_CALL *start_routine)(void *),
void *arg);
MDBX_INTERNAL_FUNC int mdbx_thread_join(mdbx_thread_t thread);
MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread);
enum mdbx_syncmode_bits {
enum osal_syncmode_bits {
MDBX_SYNC_NONE = 0,
MDBX_SYNC_DATA = 1,
MDBX_SYNC_SIZE = 2,
MDBX_SYNC_IODQ = 4
MDBX_SYNC_KICK = 1,
MDBX_SYNC_DATA = 2,
MDBX_SYNC_SIZE = 4,
MDBX_SYNC_IODQ = 8
};
MDBX_INTERNAL_FUNC int mdbx_fsync(mdbx_filehandle_t fd,
const enum mdbx_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length);
MDBX_INTERNAL_FUNC int mdbx_fseek(mdbx_filehandle_t fd, uint64_t pos);
MDBX_INTERNAL_FUNC int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length);
MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd,
const enum osal_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length);
MDBX_INTERNAL_FUNC int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
MDBX_INTERNAL_FUNC int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_READ = 0,
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
MDBX_OPEN_COPY = 4,
MDBX_OPEN_DELETE = 5
enum osal_openfile_purpose {
MDBX_OPEN_DXB_READ,
MDBX_OPEN_DXB_LAZY,
MDBX_OPEN_DXB_DSYNC,
#if defined(_WIN32) || defined(_WIN64)
MDBX_OPEN_DXB_OVERLAPPED,
MDBX_OPEN_DXB_OVERLAPPED_DIRECT,
#endif /* Windows */
MDBX_OPEN_LCK,
MDBX_OPEN_COPY,
MDBX_OPEN_DELETE
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
const MDBX_env *env, const char *pathname,
MDBX_MAYBE_UNUSED static __inline bool osal_isdirsep(pathchar_t c) {
return
#if defined(_WIN32) || defined(_WIN64)
c == '\\' ||
#endif
c == '/';
}
MDBX_INTERNAL_FUNC bool osal_pathequal(const pathchar_t *l, const pathchar_t *r,
size_t len);
MDBX_INTERNAL_FUNC pathchar_t *osal_fileext(const pathchar_t *pathname,
size_t len);
MDBX_INTERNAL_FUNC int osal_fileexists(const pathchar_t *pathname);
MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
const MDBX_env *env,
const pathchar_t *pathname,
mdbx_filehandle_t *fd,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
MDBX_INTERNAL_FUNC int osal_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int osal_removefile(const pathchar_t *pathname);
MDBX_INTERNAL_FUNC int osal_removedirectory(const pathchar_t *pathname);
MDBX_INTERNAL_FUNC int osal_is_pipe(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int osal_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
const size_t must, const size_t limit,
const unsigned options);
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map);
MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map, size_t size,
const size_t limit, const unsigned options);
MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map);
#define MDBX_MRESIZE_MAY_MOVE 0x00000100
#define MDBX_MRESIZE_MAY_UNMAP 0x00000200
MDBX_INTERNAL_FUNC int mdbx_mresize(const int flags, mdbx_mmap_t *map,
MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
size_t size, size_t limit);
#if defined(_WIN32) || defined(_WIN64)
typedef struct {
@ -434,17 +590,19 @@ typedef struct {
HANDLE handles[31];
} mdbx_handle_array_t;
MDBX_INTERNAL_FUNC int
mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
osal_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array);
MDBX_INTERNAL_FUNC int
mdbx_resume_threads_after_remap(mdbx_handle_array_t *array);
osal_resume_threads_after_remap(mdbx_handle_array_t *array);
#endif /* Windows */
MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset,
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
size_t length,
enum mdbx_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int mdbx_check_fs_rdonly(mdbx_filehandle_t handle,
const char *pathname, int err);
enum osal_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle,
const pathchar_t *pathname,
int err);
MDBX_INTERNAL_FUNC int osal_check_fs_incore(mdbx_filehandle_t handle);
MDBX_MAYBE_UNUSED static __inline uint32_t mdbx_getpid(void) {
MDBX_MAYBE_UNUSED static __inline uint32_t osal_getpid(void) {
STATIC_ASSERT(sizeof(mdbx_pid_t) <= sizeof(uint32_t));
#if defined(_WIN32) || defined(_WIN64)
return GetCurrentProcessId();
@ -454,7 +612,7 @@ MDBX_MAYBE_UNUSED static __inline uint32_t mdbx_getpid(void) {
#endif
}
MDBX_MAYBE_UNUSED static __inline uintptr_t mdbx_thread_self(void) {
MDBX_MAYBE_UNUSED static __inline uintptr_t osal_thread_self(void) {
mdbx_tid_t thunk;
STATIC_ASSERT(sizeof(uintptr_t) >= sizeof(thunk));
#if defined(_WIN32) || defined(_WIN64)
@ -467,24 +625,30 @@ MDBX_MAYBE_UNUSED static __inline uintptr_t mdbx_thread_self(void) {
#if !defined(_WIN32) && !defined(_WIN64)
#if defined(__ANDROID_API__) || defined(ANDROID) || defined(BIONIC)
MDBX_INTERNAL_FUNC int mdbx_check_tid4bionic(void);
MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void);
#else
static __inline int mdbx_check_tid4bionic(void) { return 0; }
static __inline int osal_check_tid4bionic(void) { return 0; }
#endif /* __ANDROID_API__ || ANDROID) || BIONIC */
MDBX_MAYBE_UNUSED static __inline int
mdbx_pthread_mutex_lock(pthread_mutex_t *mutex) {
int err = mdbx_check_tid4bionic();
osal_pthread_mutex_lock(pthread_mutex_t *mutex) {
int err = osal_check_tid4bionic();
return unlikely(err) ? err : pthread_mutex_lock(mutex);
}
#endif /* !Windows */
MDBX_INTERNAL_FUNC uint64_t mdbx_osal_monotime(void);
MDBX_INTERNAL_FUNC uint64_t
mdbx_osal_16dot16_to_monotime(uint32_t seconds_16dot16);
MDBX_INTERNAL_FUNC uint32_t mdbx_osal_monotime_to_16dot16(uint64_t monotime);
MDBX_INTERNAL_FUNC uint64_t osal_monotime(void);
MDBX_INTERNAL_FUNC uint64_t osal_cputime(size_t *optional_page_faults);
MDBX_INTERNAL_FUNC uint64_t osal_16dot16_to_monotime(uint32_t seconds_16dot16);
MDBX_INTERNAL_FUNC uint32_t osal_monotime_to_16dot16(uint64_t monotime);
MDBX_INTERNAL_FUNC bin128_t mdbx_osal_bootid(void);
MDBX_MAYBE_UNUSED static inline uint32_t
osal_monotime_to_16dot16_noUnderflow(uint64_t monotime) {
uint32_t seconds_16dot16 = osal_monotime_to_16dot16(monotime);
return seconds_16dot16 ? seconds_16dot16 : /* fix underflow */ (monotime > 0);
}
MDBX_INTERNAL_FUNC bin128_t osal_bootid(void);
/*----------------------------------------------------------------------------*/
/* lck stuff */
@ -500,7 +664,7 @@ MDBX_INTERNAL_FUNC bin128_t mdbx_osal_bootid(void);
/// MUST NOT initialize shared synchronization objects in memory-mapped
/// LCK-file that are already in use.
/// \return Error code or zero on success.
MDBX_INTERNAL_FUNC int mdbx_lck_init(MDBX_env *env,
MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env,
MDBX_env *inprocess_neighbor,
int global_uniqueness_flag);
@ -521,7 +685,7 @@ MDBX_INTERNAL_FUNC int mdbx_lck_init(MDBX_env *env,
/// of other instances of MDBX_env within the current process, e.g.
/// restore POSIX-fcntl locks after the closing of file descriptors.
/// \return Error code (MDBX_PANIC) or zero on success.
MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env,
MDBX_env *inprocess_neighbor);
/// \brief Connects to shared interprocess locking objects and tries to acquire
@ -529,14 +693,14 @@ MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
/// Depending on implementation or/and platform (Windows) this function may
/// acquire the non-OS super-level lock (e.g. for shared synchronization
/// objects initialization), which will be downgraded to OS-exclusive or
/// shared via explicit calling of mdbx_lck_downgrade().
/// shared via explicit calling of osal_lck_downgrade().
/// \return
/// MDBX_RESULT_TRUE (-1) - if an exclusive lock was acquired and thus
/// the current process is the first and only after the last use of DB.
/// MDBX_RESULT_FALSE (0) - if a shared lock was acquired and thus
/// DB has already been opened and now is used by other processes.
/// Otherwise (not 0 and not -1) - error code.
MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env);
MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env);
/// \brief Downgrades the level of initially acquired lock to
/// operational level specified by argument. The reason for such downgrade:
@ -549,14 +713,14 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env);
/// (env->me_flags & MDBX_EXCLUSIVE) != 0 - downgrade to exclusive
/// operational lock.
/// \return Error code or zero on success
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env);
MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env);
/// \brief Locks LCK-file or/and table of readers for (de)registering.
/// \return Error code or zero on success
MDBX_INTERNAL_FUNC int mdbx_rdt_lock(MDBX_env *env);
MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env);
/// \brief Unlocks LCK-file or/and table of readers after (de)registering.
MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env);
MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env);
/// \brief Acquires lock for DB change (on writing transaction start)
/// Reading transactions will not be blocked.
@ -571,15 +735,15 @@ LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env);
/// \brief Sets alive-flag of reader presence (indicative lock) for PID of
/// the current process. The function does no more than needed for
/// the correct working of mdbx_rpid_check() in other processes.
/// the correct working of osal_rpid_check() in other processes.
/// \return Error code or zero on success
MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env);
MDBX_INTERNAL_FUNC int osal_rpid_set(MDBX_env *env);
/// \brief Resets alive-flag of reader presence (indicative lock)
/// for PID of the current process. The function does no more than needed
/// for the correct working of mdbx_rpid_check() in other processes.
/// for the correct working of osal_rpid_check() in other processes.
/// \return Error code or zero on success
MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env);
MDBX_INTERNAL_FUNC int osal_rpid_clear(MDBX_env *env);
/// \brief Checks for reading process status with the given pid with help of
/// alive-flag of presence (indicative lock) or using another way.
@ -589,28 +753,16 @@ MDBX_INTERNAL_FUNC int mdbx_rpid_clear(MDBX_env *env);
/// MDBX_RESULT_FALSE (0) - if the reader process with the given PID is absent
/// or not working with DB (indicative lock is not present).
/// Otherwise (not 0 and not -1) - error code.
MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid);
MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid);
#if defined(_WIN32) || defined(_WIN64)
#define MUSTDIE_MB2WIDE(FROM, TO) \
do { \
const char *const from_tmp = (FROM); \
const size_t from_mblen = strlen(from_tmp); \
const size_t to_wlen = mdbx_mb2w(nullptr, 0, from_tmp, from_mblen); \
if (to_wlen < 1 || to_wlen > /* MAX_PATH */ INT16_MAX) \
return ERROR_INVALID_NAME; \
wchar_t *const to_tmp = _alloca((to_wlen + 1) * sizeof(wchar_t)); \
if (to_wlen + 1 != \
mdbx_mb2w(to_tmp, to_wlen + 1, from_tmp, from_mblen + 1)) \
return ERROR_INVALID_NAME; \
(TO) = to_tmp; \
} while (0)
MDBX_INTERNAL_FUNC int osal_mb2w(const char *const src, wchar_t **const pdst);
typedef void(WINAPI *MDBX_srwlock_function)(MDBX_srwlock *);
MDBX_INTERNAL_VAR MDBX_srwlock_function mdbx_srwlock_Init,
mdbx_srwlock_AcquireShared, mdbx_srwlock_ReleaseShared,
mdbx_srwlock_AcquireExclusive, mdbx_srwlock_ReleaseExclusive;
typedef void(WINAPI *osal_srwlock_t_function)(osal_srwlock_t *);
MDBX_INTERNAL_VAR osal_srwlock_t_function osal_srwlock_Init,
osal_srwlock_AcquireShared, osal_srwlock_ReleaseShared,
osal_srwlock_AcquireExclusive, osal_srwlock_ReleaseExclusive;
#if _WIN32_WINNT < 0x0600 /* prior to Windows Vista */
typedef enum _FILE_INFO_BY_HANDLE_CLASS {
@ -726,12 +878,57 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
NTSYSAPI ULONG RtlRandomEx(PULONG Seed);
typedef BOOL(WINAPI *MDBX_SetFileIoOverlappedRange)(HANDLE FileHandle,
PUCHAR OverlappedRangeStart,
ULONG Length);
MDBX_INTERNAL_VAR MDBX_SetFileIoOverlappedRange mdbx_SetFileIoOverlappedRange;
#endif /* Windows */
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint64_t
osal_bswap64(uint64_t v) {
#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \
__has_builtin(__builtin_bswap64)
return __builtin_bswap64(v);
#elif defined(_MSC_VER) && !defined(__clang__)
return _byteswap_uint64(v);
#elif defined(__bswap_64)
return __bswap_64(v);
#elif defined(bswap_64)
return bswap_64(v);
#else
return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) |
((v << 24) & UINT64_C(0x0000ff0000000000)) |
((v << 8) & UINT64_C(0x000000ff00000000)) |
((v >> 8) & UINT64_C(0x00000000ff000000)) |
((v >> 24) & UINT64_C(0x0000000000ff0000)) |
((v >> 40) & UINT64_C(0x000000000000ff00));
#endif
}
MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static __always_inline uint32_t
osal_bswap32(uint32_t v) {
#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \
__has_builtin(__builtin_bswap32)
return __builtin_bswap32(v);
#elif defined(_MSC_VER) && !defined(__clang__)
return _byteswap_ulong(v);
#elif defined(__bswap_32)
return __bswap_32(v);
#elif defined(bswap_32)
return bswap_32(v);
#else
return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) |
((v >> 8) & UINT32_C(0x0000ff00));
#endif
}
/*----------------------------------------------------------------------------*/
#if defined(_MSC_VER) && _MSC_VER >= 1900
/* LY: MSVC 2015/2017/2019 has buggy/inconsistent PRIuPTR/PRIxPTR macros
* for internal format-args checker. */

View File

@ -2,32 +2,32 @@ enable_language(CXX)
include(../cmake/compiler.cmake)
set(LIBMDBX_TEST_SOURCES
base.h
cases.cc
chrono.cc
chrono.h
config.cc
config.h
copy.cc
dead.cc
hill.cc
jitter.cc
keygen.cc
keygen.h
log.cc
log.h
main.cc
osal.h
osal-unix.cc
osal-windows.cc
test.cc
test.h
try.cc
utils.cc
utils.h
append.cc
ttl.cc
nested.cc
base.h++
cases.c++
chrono.c++
chrono.h++
config.c++
config.h++
copy.c++
dead.c++
hill.c++
jitter.c++
keygen.c++
keygen.h++
log.c++
log.h++
main.c++
osal.h++
osal-unix.c++
osal-windows.c++
test.c++
test.h++
try.c++
utils.c++
utils.h++
append.c++
ttl.c++
nested.c++
)
if(NOT MDBX_BUILD_CXX)
@ -92,11 +92,13 @@ else()
set_tests_properties(smoke_chk PROPERTIES
DEPENDS smoke
TIMEOUT 60
FAIL_REGULAR_EXPRESSION "cooperative mode"
REQUIRED_FILES smoke.db)
add_test(NAME smoke_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv smoke.db-copy)
set_tests_properties(smoke_chk_copy PROPERTIES
DEPENDS smoke
TIMEOUT 60
FAIL_REGULAR_EXPRESSION "cooperative mode"
REQUIRED_FILES smoke.db-copy)
endif()
@ -109,15 +111,16 @@ else()
TIMEOUT 600
RUN_SERIAL OFF)
if(MDBX_BUILD_TOOLS)
add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv dupsort_writemap.db)
add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvwc dupsort_writemap.db)
set_tests_properties(dupsort_writemap_chk PROPERTIES
DEPENDS dupsort_writemap
TIMEOUT 60
REQUIRED_FILES dupsort_writemap.db)
add_test(NAME dupsort_writemap_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv dupsort_writemap.db-copy)
add_test(NAME dupsort_writemap_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvc dupsort_writemap.db-copy)
set_tests_properties(dupsort_writemap_chk_copy PROPERTIES
DEPENDS dupsort_writemap
TIMEOUT 60
FAIL_REGULAR_EXPRESSION "monopolistic mode"
REQUIRED_FILES dupsort_writemap.db-copy)
endif()
@ -128,15 +131,17 @@ else()
TIMEOUT 1800
RUN_SERIAL OFF)
if(MDBX_BUILD_TOOLS)
add_test(NAME uniq_nested_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv uniq_nested.db)
add_test(NAME uniq_nested_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvw uniq_nested.db)
set_tests_properties(uniq_nested_chk PROPERTIES
DEPENDS uniq_nested
TIMEOUT 60
FAIL_REGULAR_EXPRESSION "cooperative mode"
REQUIRED_FILES uniq_nested.db)
add_test(NAME uniq_nested_chk_copy COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvv uniq_nested.db-copy)
set_tests_properties(uniq_nested_chk_copy PROPERTIES
DEPENDS uniq_nested
TIMEOUT 60
FAIL_REGULAR_EXPRESSION "cooperative mode"
REQUIRED_FILES uniq_nested.db-copy)
endif()

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
class testcase_append : public testcase {
public:
@ -21,7 +21,14 @@ public:
bool run() override;
static bool review_params(actor_params &params) {
return testcase::review_params(params) && params.make_keygen_linear();
if (!testcase::review_params(params))
return false;
const bool ordered = !flipcoin_x3();
log_notice("the '%s' key-generation mode is selected",
ordered ? "ordered/linear" : "unordered/non-linear");
if (ordered && !params.make_keygen_linear())
return false;
return true;
}
};
REGISTER_TESTCASE(append);
@ -133,8 +140,6 @@ bool testcase_append::run() {
}
} else
failure_perror("mdbx_get_equal_or_great()", err);
assert(!expect_key_mismatch);
}
err = mdbx_cursor_put(cursor_guard.get(), &key->value, &data->value, flags);
@ -148,12 +153,25 @@ bool testcase_append::run() {
if (!expect_key_mismatch) {
if (unlikely(err != MDBX_SUCCESS))
failure_perror("mdbx_cursor_put(insert-a)", err);
failure_perror("mdbx_cursor_put(append)", err);
++inserted_number;
inserted_checksum.push((uint32_t)inserted_number, key->value);
inserted_checksum.push(10639, data->value);
if (config.params.speculum) {
Item item(iov2dataview(key), iov2dataview(data));
const auto insertion_result = speculum.insert(item);
if (!insertion_result.second) {
char dump_key[32], dump_value[32];
log_error(
"speculum.append: unexpected %s {%s, %s}", "MDBX_SUCCESS",
mdbx_dump_val(&key->value, dump_key, sizeof(dump_key)),
mdbx_dump_val(&data->value, dump_value, sizeof(dump_value)));
return false;
}
}
} else if (unlikely(err != MDBX_EKEYMISMATCH))
failure_perror("mdbx_cursor_put(insert-a) != MDBX_EKEYMISMATCH", err);
failure_perror("mdbx_cursor_put(append) != MDBX_EKEYMISMATCH", err);
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
@ -166,6 +184,10 @@ bool testcase_append::run() {
committed_inserted_number = inserted_number;
committed_inserted_checksum = inserted_checksum;
txn_nops = 0;
if (!speculum_verify()) {
log_notice("append: bailout breakable_restart");
return false;
}
}
report(1);
@ -181,6 +203,10 @@ bool testcase_append::run() {
}
//----------------------------------------------------------------------------
txn_begin(true);
if (!speculum_verify()) {
log_notice("append: bailout verify");
return false;
}
cursor_renew();
MDBX_val check_key, check_data;
@ -209,7 +235,8 @@ bool testcase_append::run() {
failure("read_count(%" PRIu64 ") != inserted_number(%" PRIu64 ")",
read_count, inserted_number);
if (unlikely(read_checksum.value != inserted_checksum.value))
if (unlikely(read_checksum.value != inserted_checksum.value) &&
!keyvalue_maker.is_unordered())
failure("read_checksum(0x%016" PRIu64 ") "
"!= inserted_checksum(0x%016" PRIu64 ")",
read_checksum.value, inserted_checksum.value);

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -30,6 +30,10 @@
#define _WIN32_WINNT 0x0601 /* Windows 7 */
#endif
#ifdef _MSC_VER
/* Workaround for MSVC' header `extern "C"` vs `std::` redefinition bug */
#if defined(__SANITIZE_ADDRESS__) && !defined(_DISABLE_VECTOR_ANNOTATION)
#define _DISABLE_VECTOR_ANNOTATION
#endif /* _DISABLE_VECTOR_ANNOTATION */
#ifndef _CRT_SECURE_NO_WARNINGS
#define _CRT_SECURE_NO_WARNINGS
#endif /* _CRT_SECURE_NO_WARNINGS */
@ -94,7 +98,7 @@
#define MDBX_INTERNAL_FUNC
#define MDBX_INTERNAL_VAR extern
#define xMDBX_TOOLS /* Avoid using internal mdbx_assert() */
#define xMDBX_TOOLS /* Avoid using internal eASSERT() */
#include "../mdbx.h++"
#include "../src/base.h"
#include "../src/osal.h"

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
registry *registry::instance() {
static registry *singleton;

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
namespace chrono {
@ -87,10 +87,11 @@ time from_ms(uint64_t ms) {
time now_realtime() {
#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS)
static void(WINAPI * query_time)(LPFILETIME);
if (!query_time) {
if (unlikely(!query_time)) {
HMODULE hModule = GetModuleHandle(TEXT("kernel32.dll"));
if (hModule)
query_time = (void(WINAPI *)(LPFILETIME))GetProcAddress(
GetModuleHandle(TEXT("kernel32.dll")),
"GetSystemTimePreciseAsFileTime");
hModule, "GetSystemTimePreciseAsFileTime");
if (!query_time)
query_time = GetSystemTimeAsFileTime;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -14,8 +14,8 @@
#pragma once
#include "base.h"
#include "utils.h"
#include "base.h++"
#include "utils.h++"
namespace chrono {

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
#if defined(_MSC_VER) && !defined(strcasecmp)
#define strcasecmp(str, len) _stricmp(str, len)
@ -369,7 +369,6 @@ const struct option_verb mode_bits[] = {
{"notls", unsigned(MDBX_NOTLS)},
{"nordahead", unsigned(MDBX_NORDAHEAD)},
{"nomeminit", unsigned(MDBX_NOMEMINIT)},
{"coalesce", unsigned(MDBX_COALESCE)},
{"lifo", unsigned(MDBX_LIFORECLAIM)},
{"perturb", unsigned(MDBX_PAGEPERTURB)},
{"accede", unsigned(MDBX_ACCEDE)},

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -14,9 +14,9 @@
#pragma once
#include "base.h"
#include "log.h"
#include "utils.h"
#include "base.h++"
#include "log.h++"
#include "utils.h++"
#define ACTOR_ID_MAX INT16_MAX

View File

@ -1,4 +1,4 @@
#include "test.h"
#include "test.h++"
class testcase_copy : public testcase {
const std::string copy_pathname;
@ -15,7 +15,7 @@ REGISTER_TESTCASE(copy);
void testcase_copy::copy_db(const bool with_compaction) {
int err = mdbx_env_delete(copy_pathname.c_str(), MDBX_ENV_JUST_DELETE);
if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE)
failure_perror("mdbx_removefile()", err);
failure_perror("osal_removefile()", err);
err = mdbx_env_copy(db_guard.get(), copy_pathname.c_str(),
with_compaction ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS);

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
class testcase_deadread : public testcase {
public:

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
/* LY: тест "холмиком":
* - сначала наполняем таблицу циклическими CRUD-манипуляциями,

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
class testcase_jitter : public testcase {
protected:

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
namespace keygen {
@ -227,7 +227,8 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id,
(void)thread_number;
mapping = actor.keygen;
salt = (actor.keygen.seed + actor_id) * UINT64_C(14653293970879851569);
salt =
(actor.keygen.seed + uint64_t(actor_id)) * UINT64_C(14653293970879851569);
base = actor.serial_base();
}
@ -315,11 +316,12 @@ void __hot maker::mk_begin(const serial_t serial, const essentials &params,
out.value.iov_len = std::max(unsigned(params.minlen), length(serial));
const auto variation = params.maxlen - params.minlen;
if (variation) {
if (serial % (variation + 1)) {
if (serial % (variation + serial_t(1))) {
auto refix = serial * UINT64_C(48835288005252737);
refix ^= refix >> 32;
out.value.iov_len = std::max(
out.value.iov_len, params.minlen + 1 + size_t(refix) % variation);
out.value.iov_len =
std::max(out.value.iov_len,
params.minlen + size_t(1) + size_t(refix) % variation);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -14,10 +14,10 @@
#pragma once
#include "base.h"
#include "config.h"
#include "log.h"
#include "utils.h"
#include "base.h++"
#include "config.h++"
#include "log.h++"
#include "utils.h++"
namespace keygen {

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
static void fflushall() { fflush(nullptr); }
@ -61,7 +61,8 @@ static FILE *last;
void setlevel(loglevel priority) {
level = priority;
int rc = mdbx_setup_debug(MDBX_log_level_t(priority),
MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER,
MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER |
MDBX_DBG_DUMP,
mdbx_logger);
log_trace("set mdbx debug-opts: 0x%02x", rc);
}
@ -141,7 +142,7 @@ void output_nocheckloglevel_ap(const logging::loglevel priority,
prefix.c_str(), level2str(priority), suffix.c_str());
va_list ones;
memset(&ones, 0, sizeof(ones)) /* zap MSVC and other stupid compilers */;
memset(&ones, 0, sizeof(ones)) /* zap MSVC and other goofy compilers */;
if (same_or_higher(priority, error))
va_copy(ones, ap);
vfprintf(last, format, ap);
@ -152,11 +153,11 @@ void output_nocheckloglevel_ap(const logging::loglevel priority,
switch (end) {
default:
putc('\n', last);
// fall through
MDBX_CXX17_FALLTHROUGH; // fall through
case '\n':
fflush(last);
last = nullptr;
// fall through
MDBX_CXX17_FALLTHROUGH; // fall through
case ' ':
case '_':
case ':':

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -14,8 +14,8 @@
#pragma once
#include "base.h"
#include "chrono.h"
#include "base.h++"
#include "chrono.h++"
MDBX_NORETURN void usage(void);
MDBX_NORETURN void MDBX_PRINTF_ARGS(1, 2) failure(const char *fmt, ...);

View File

@ -12,6 +12,7 @@ UNAME="$(uname -s 2>/dev/null || echo Unknown)"
DB_UPTO_MB=17408
PAGESIZE=min
DONT_CHECK_RAM=no
EXTRA=no
while [ -n "$1" ]
do
@ -31,8 +32,9 @@ do
echo "--dir PATH Specifies directory for test DB and other files (it will be cleared)"
echo "--db-upto-mb NN Limits upper size of test DB to the NN megabytes"
echo "--no-geometry-jitter Disable jitter for geometry upper-size"
echo "--pagesize NN Use specified page size (256 is minimal and used by default) "
echo "--dont-check-ram-size Don't check available RAM "
echo "--pagesize NN Use specified page size (256 is minimal and used by default)"
echo "--dont-check-ram-size Don't check available RAM"
echo "--extra Iterate extra modes/flags"
echo "--help Print this usage help and exit"
exit -2
;;
@ -136,7 +138,7 @@ do
PAGESIZE=$((1024*64))
;;
*)
echo "Invalig page size '$2'"
echo "Invalid page size '$2'"
exit -2
;;
esac
@ -145,6 +147,9 @@ do
--dont-check-ram-size)
DONT_CHECK_RAM=yes
;;
--extra)
EXTRA=yes
;;
*)
echo "Unknown option '$1'"
exit -2
@ -350,9 +355,12 @@ else
}
fi
syncmodes=("" ,+nosync-safe ,+nosync-utterly)
options=(writemap lifo notls perturb)
if [ "$EXTRA" != "no" ]; then
options=(writemap lifo notls perturb nomeminit nordahead)
else
options=(writemap lifo notls)
fi
syncmodes=("" ,+nosync-safe ,+nosync-utterly ,+nometasync)
function join { local IFS="$1"; shift; echo "$*"; }
function bits2options {
@ -414,65 +422,89 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10
split=30
caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
split=24
caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
split=16
caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
if [ "$EXTRA" != "no" ]; then
split=10
caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
fi
split=4
caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \
--pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%3]} \
--nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \
--keygen.seed=${seed}
done # options
loop=$((loop + 1))

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
#if !(defined(_WIN32) || defined(_WIN64))
#include <sys/resource.h>
@ -98,7 +98,6 @@ MDBX_NORETURN void usage(void) {
" accede == MDBX_ACCEDE\n"
" nometasync == MDBX_NOMETASYNC\n"
" lifo == MDBX_LIFORECLAIM\n"
" coalesce == MDBX_COALESCE\n"
" nosync-safe == MDBX_SAFE_NOSYNC\n"
" writemap == MDBX_WRITEMAP\n"
" nosync-utterly == MDBX_UTTERLY_NOSYNC\n"
@ -130,8 +129,7 @@ void actor_params::set_defaults(const std::string &tmpdir) {
#endif
pathname_db = tmpdir + "mdbx-test.db";
mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_SAFE_NOSYNC |
MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_ACCEDE;
mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_SYNC_DURABLE | MDBX_ACCEDE;
table_flags = MDBX_DUPSORT;
size_lower = -1;
@ -682,9 +680,9 @@ int main(int argc, char *const argv[]) {
if (!actor)
continue;
log_verbose("actor #%u, id %d, pid %ld: %s\n", actor->actor_id,
actor->space_id, (long)pid, status2str(status));
if (status > as_running) {
log_notice("actor #%u, id %d, pid %ld: %s\n", actor->actor_id,
actor->space_id, (long)pid, status2str(status));
left -= 1;
if (status != as_successful) {
if (global::config::failfast && !failed) {
@ -694,6 +692,9 @@ int main(int argc, char *const argv[]) {
}
failed = true;
}
} else {
log_verbose("actor #%u, id %d, pid %ld: %s\n", actor->actor_id,
actor->space_id, (long)pid, status2str(status));
}
} else {
if (timeout_seconds_left == 0)

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
#include <cmath>
/* LY: тест "эмуляцией time-to-live" с вложенными транзакциями:

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
#if !(defined(_WIN32) || defined(_WIN64))

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
#if defined(_WIN32) || defined(_WIN64)
@ -71,7 +71,7 @@ void osal_setup(const std::vector<actor_config> &actors) {
events.reserve(n);
for (unsigned i = 0; i < n; ++i) {
HANDLE hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
HANDLE hEvent = CreateEventW(NULL, TRUE, FALSE, NULL);
if (!hEvent)
failure_perror("CreateEvent()", GetLastError());
hEvent = make_inheritable(hEvent);
@ -79,22 +79,22 @@ void osal_setup(const std::vector<actor_config> &actors) {
events[i] = hEvent;
}
hBarrierSemaphore = CreateSemaphore(NULL, 0, (LONG)actors.size(), NULL);
hBarrierSemaphore = CreateSemaphoreW(NULL, 0, (LONG)actors.size(), NULL);
if (!hBarrierSemaphore)
failure_perror("CreateSemaphore(BarrierSemaphore)", GetLastError());
hBarrierSemaphore = make_inheritable(hBarrierSemaphore);
hBarrierEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
hBarrierEvent = CreateEventW(NULL, TRUE, FALSE, NULL);
if (!hBarrierEvent)
failure_perror("CreateEvent(BarrierEvent)", GetLastError());
hBarrierEvent = make_inheritable(hBarrierEvent);
hProgressActiveEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
hProgressActiveEvent = CreateEventW(NULL, FALSE, FALSE, NULL);
if (!hProgressActiveEvent)
failure_perror("CreateEvent(ProgressActiveEvent)", GetLastError());
hProgressActiveEvent = make_inheritable(hProgressActiveEvent);
hProgressPassiveEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
hProgressPassiveEvent = CreateEventW(NULL, FALSE, FALSE, NULL);
if (!hProgressPassiveEvent)
failure_perror("CreateEvent(ProgressPassiveEvent)", GetLastError());
hProgressPassiveEvent = make_inheritable(hProgressPassiveEvent);
@ -248,7 +248,7 @@ Environment:
CommandLine.push_back('"');
for (auto It = Argument.begin();; ++It) {
unsigned NumberBackslashes = 0;
size_t NumberBackslashes = 0;
while (It != Argument.end() && *It == '\\') {
++It;
@ -348,6 +348,7 @@ actor_status osal_actor_info(const mdbx_pid_t pid) {
status = as_debugging;
break;
case STATUS_CONTROL_C_EXIT:
case /* STATUS_INTERRUPTED */ 0xC0000515L:
status = as_killed;
break;
case EXCEPTION_ACCESS_VIOLATION:
@ -357,6 +358,12 @@ actor_status osal_actor_info(const mdbx_pid_t pid) {
case EXCEPTION_INVALID_DISPOSITION:
case EXCEPTION_ILLEGAL_INSTRUCTION:
case EXCEPTION_NONCONTINUABLE_EXCEPTION:
case /* STATUS_STACK_BUFFER_OVERRUN, STATUS_BUFFER_OVERFLOW_PREVENTED */
0xC0000409L:
case /* STATUS_ASSERTION_FAILURE */ 0xC0000420L:
case /* STATUS_HEAP_CORRUPTION */ 0xC0000374L:
case /* STATUS_CONTROL_STACK_VIOLATION */ 0xC00001B2L:
log_error("pid %zu, exception 0x%x", (intptr_t)pid, (unsigned)ExitCode);
status = as_coredump;
break;
default:
@ -428,7 +435,7 @@ void osal_udelay(size_t us) {
unsigned timeslice_ms = 1;
while (timeBeginPeriod(timeslice_ms) == TIMERR_NOCANDO)
++timeslice_ms;
threshold_us = timeslice_ms * 1500u;
threshold_us = timeslice_ms * size_t(1500);
assert(threshold_us > 0);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -14,7 +14,7 @@
#pragma once
#include "base.h"
#include "base.h++"
void osal_setup(const std::vector<actor_config> &actors);
void osal_broadcast(unsigned id);

View File

@ -1,5 +1,5 @@
/*
* Copyright 2016-2022 Leonid Yuriev <leo@yuriev.ru>.
* Copyright 2016-2023 Leonid Yuriev <leo@yuriev.ru>.
* Copyright 2015 Vladimir Romanov
* <https://www.linkedin.com/in/vladimirromanov>, Yota Lab.
*

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
const char *testcase2str(const actor_testcase testcase) {
switch (testcase) {
@ -100,7 +100,7 @@ int testcase::hsr_callback(const MDBX_env *env, const MDBX_txn *txn,
info.mi_geo.current >= info.mi_geo.upper)) {
osal_yield();
if (retry > 0)
osal_udelay(retry * 100);
osal_udelay(retry * size_t(100));
return MDBX_RESULT_FALSE /* retry / wait until reader done */;
}
@ -158,12 +158,17 @@ void testcase::db_open() {
if (config.params.random_writemap && flipcoin())
mode ^= MDBX_WRITEMAP;
actual_env_mode = mode;
int rc = mdbx_env_open(db_guard.get(), config.params.pathname_db.c_str(),
mode, 0640);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_env_open()", rc);
unsigned env_flags_proxy;
rc = mdbx_env_get_flags(db_guard.get(), &env_flags_proxy);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_env_get_flags()", rc);
actual_env_mode = MDBX_env_flags_t(env_flags_proxy);
rc = mdbx_env_set_syncperiod(db_guard.get(), unsigned(0.042 * 65536));
if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_BUSY)
failure_perror("mdbx_env_set_syncperiod()", rc);
@ -199,6 +204,19 @@ void testcase::txn_begin(bool readonly, MDBX_txn_flags_t flags) {
log_trace("<< txn_begin(%s, 0x%04X)", readonly ? "read-only" : "read-write",
flags);
if (flipcoin_n(5)) {
const unsigned mask =
unsigned(MDBX_warmup_default | MDBX_warmup_force | MDBX_warmup_oomsafe |
MDBX_warmup_lock | MDBX_warmup_touchlimit);
static unsigned counter;
MDBX_warmup_flags_t warmup_flags = MDBX_warmup_flags_t(
(counter > MDBX_warmup_release) ? prng64() & mask : counter);
counter += 1;
int err = mdbx_env_warmup(db_guard.get(), txn, warmup_flags, 0);
log_trace("== counter %u, env_warmup(flags %u), rc %d", counter,
warmup_flags, err);
}
}
int testcase::breakable_commit() {

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -14,13 +14,13 @@
#pragma once
#include "base.h"
#include "chrono.h"
#include "config.h"
#include "keygen.h"
#include "log.h"
#include "osal.h"
#include "utils.h"
#include "base.h++"
#include "chrono.h++"
#include "config.h++"
#include "keygen.h++"
#include "log.h++"
#include "osal.h++"
#include "utils.h++"
#include <deque>
#include <set>
@ -101,10 +101,10 @@ class testcase;
class registry {
struct record {
actor_testcase id;
actor_testcase id = ac_none;
std::string name;
bool (*review_params)(actor_params &);
testcase *(*constructor)(const actor_config &, const mdbx_pid_t);
bool (*review_params)(actor_params &) = nullptr;
testcase *(*constructor)(const actor_config &, const mdbx_pid_t) = nullptr;
};
std::unordered_map<std::string, const record *> name2id;
std::unordered_map<int, const record *> id2record;

View File

@ -1,4 +1,4 @@
#include "test.h"
#include "test.h++"
class testcase_try : public testcase {
public:

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
#include <cmath>
#include <deque>

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -12,7 +12,7 @@
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include "test.h++"
#include <float.h>
#if defined(HAVE_IEEE754_H) || __has_include(<ieee754.h>)
#include <ieee754.h>

View File

@ -1,5 +1,5 @@
/*
* Copyright 2017-2022 Leonid Yuriev <leo@yuriev.ru>
* Copyright 2017-2023 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
@ -13,7 +13,7 @@
*/
#pragma once
#include "base.h"
#include "base.h++"
#if !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) || \
!defined(__ORDER_BIG_ENDIAN__)

View File

@ -4,7 +4,7 @@
msync(start)
fun:msync
...
fun:mdbx_sync_locked*
fun:sync_locked*
}
{
msync-whole-mmap-2
@ -12,7 +12,7 @@
msync(start)
fun:msync
...
fun:mdbx_env_sync_internal*
fun:env_sync*
}
{
msync-whole-mmap-3
@ -20,7 +20,7 @@
msync(start)
fun:msync
...
fun:mdbx_mapresize*
fun:map_resize*
}
{
msync-wipe-steady
@ -28,21 +28,43 @@
msync(start)
fun:msync
...
fun:mdbx_wipe_steady*
fun:wipe_steady*
}
{
msync-meta
Memcheck:Param
msync(start)
fun:msync
...
fun:meta_sync*
}
{
msync-spill
Memcheck:Param
msync(start)
fun:msync
...
fun:txn_spill*
}
# memcmp() inside mdbx_iov_write() as workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269
# memcmp() inside iov_write() as workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269
{
write-page-check-bcmp
iov-pagecheck-1
Memcheck:Cond
fun:bcmp
fun:mdbx_iov_write*
fun:iov_callback4dirtypages
fun:osal_ioring_walk
fun:iov_complete
fun:iov_write
}
{
write-page-check-memcmp
iov-pagecheck-2
Memcheck:Cond
fun:memcmp*
fun:mdbx_iov_write*
fun:iov_callback4dirtypages
fun:osal_ioring_walk
fun:iov_complete
fun:iov_write
}
# single-page flush by pwrite()
@ -52,7 +74,7 @@
pwrite(buf)
fun:pwrite
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwrite64-page-flush
@ -60,7 +82,7 @@
pwrite64(buf)
fun:pwrite
...
fun:mdbx_iov_write*
fun:iov_write*
}
# modern Valgrind don't support the `vector[...]` pattern
@ -70,16 +92,16 @@
# pwritev(vector[...])
# fun:pwritev
# ...
# fun:mdbx_iov_write*
# fun:iov_write*
#}
# for((i=0;i<64;++i)); do echo -e "{\n pwritev-page-flush-$i\n Memcheck:Param\n pwritev(vector[$i])\n fun:pwritev\n ...\n fun:mdbx_iov_write*\n}"; done >> valgrind_suppress.txt
# for((i=0;i<64;++i)); do echo -e "{\n pwritev-page-flush-$i\n Memcheck:Param\n pwritev(vector[$i])\n fun:pwritev\n ...\n fun:iov_write*\n}"; done >> valgrind_suppress.txt
{
pwritev-page-flush-0
Memcheck:Param
pwritev(vector[0])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-1
@ -87,7 +109,7 @@
pwritev(vector[1])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-2
@ -95,7 +117,7 @@
pwritev(vector[2])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-3
@ -103,7 +125,7 @@
pwritev(vector[3])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-4
@ -111,7 +133,7 @@
pwritev(vector[4])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-5
@ -119,7 +141,7 @@
pwritev(vector[5])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-6
@ -127,7 +149,7 @@
pwritev(vector[6])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-7
@ -135,7 +157,7 @@
pwritev(vector[7])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-8
@ -143,7 +165,7 @@
pwritev(vector[8])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-9
@ -151,7 +173,7 @@
pwritev(vector[9])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-10
@ -159,7 +181,7 @@
pwritev(vector[10])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-11
@ -167,7 +189,7 @@
pwritev(vector[11])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-12
@ -175,7 +197,7 @@
pwritev(vector[12])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-13
@ -183,7 +205,7 @@
pwritev(vector[13])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-14
@ -191,7 +213,7 @@
pwritev(vector[14])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-15
@ -199,7 +221,7 @@
pwritev(vector[15])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-16
@ -207,7 +229,7 @@
pwritev(vector[16])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-17
@ -215,7 +237,7 @@
pwritev(vector[17])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-18
@ -223,7 +245,7 @@
pwritev(vector[18])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-19
@ -231,7 +253,7 @@
pwritev(vector[19])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-20
@ -239,7 +261,7 @@
pwritev(vector[20])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-21
@ -247,7 +269,7 @@
pwritev(vector[21])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-22
@ -255,7 +277,7 @@
pwritev(vector[22])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-23
@ -263,7 +285,7 @@
pwritev(vector[23])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-24
@ -271,7 +293,7 @@
pwritev(vector[24])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-25
@ -279,7 +301,7 @@
pwritev(vector[25])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-26
@ -287,7 +309,7 @@
pwritev(vector[26])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-27
@ -295,7 +317,7 @@
pwritev(vector[27])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-28
@ -303,7 +325,7 @@
pwritev(vector[28])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-29
@ -311,7 +333,7 @@
pwritev(vector[29])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-30
@ -319,7 +341,7 @@
pwritev(vector[30])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-31
@ -327,7 +349,7 @@
pwritev(vector[31])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-32
@ -335,7 +357,7 @@
pwritev(vector[32])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-33
@ -343,7 +365,7 @@
pwritev(vector[33])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-34
@ -351,7 +373,7 @@
pwritev(vector[34])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-35
@ -359,7 +381,7 @@
pwritev(vector[35])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-36
@ -367,7 +389,7 @@
pwritev(vector[36])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-37
@ -375,7 +397,7 @@
pwritev(vector[37])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-38
@ -383,7 +405,7 @@
pwritev(vector[38])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-39
@ -391,7 +413,7 @@
pwritev(vector[39])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-40
@ -399,7 +421,7 @@
pwritev(vector[40])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-41
@ -407,7 +429,7 @@
pwritev(vector[41])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-42
@ -415,7 +437,7 @@
pwritev(vector[42])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-43
@ -423,7 +445,7 @@
pwritev(vector[43])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-44
@ -431,7 +453,7 @@
pwritev(vector[44])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-45
@ -439,7 +461,7 @@
pwritev(vector[45])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-46
@ -447,7 +469,7 @@
pwritev(vector[46])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-47
@ -455,7 +477,7 @@
pwritev(vector[47])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-48
@ -463,7 +485,7 @@
pwritev(vector[48])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-49
@ -471,7 +493,7 @@
pwritev(vector[49])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-50
@ -479,7 +501,7 @@
pwritev(vector[50])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-51
@ -487,7 +509,7 @@
pwritev(vector[51])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-52
@ -495,7 +517,7 @@
pwritev(vector[52])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-53
@ -503,7 +525,7 @@
pwritev(vector[53])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-54
@ -511,7 +533,7 @@
pwritev(vector[54])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-55
@ -519,7 +541,7 @@
pwritev(vector[55])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-56
@ -527,7 +549,7 @@
pwritev(vector[56])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-57
@ -535,7 +557,7 @@
pwritev(vector[57])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-58
@ -543,7 +565,7 @@
pwritev(vector[58])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-59
@ -551,7 +573,7 @@
pwritev(vector[59])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-60
@ -559,7 +581,7 @@
pwritev(vector[60])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-61
@ -567,7 +589,7 @@
pwritev(vector[61])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-62
@ -575,7 +597,7 @@
pwritev(vector[62])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}
{
pwritev-page-flush-63
@ -583,5 +605,5 @@
pwritev(vector[63])
fun:pwritev
...
fun:mdbx_iov_write*
fun:iov_write*
}