mirror of
https://github.com/isar/libmdbx.git
synced 2025-12-16 17:12:23 +08:00
Compare commits
58 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9d7c5243d2 | ||
|
|
5b740913d0 | ||
|
|
2f33449f2a | ||
|
|
aac890314c | ||
|
|
cfb9a55ebc | ||
|
|
cdbf2ed856 | ||
|
|
f2f2cc3b40 | ||
|
|
3f8dad1ede | ||
|
|
a17e041830 | ||
|
|
207fc11d76 | ||
|
|
3813333b28 | ||
|
|
7628369819 | ||
|
|
3bec0dbc6e | ||
|
|
c07cfd30e1 | ||
|
|
ffb822cb61 | ||
|
|
7f8e3c8781 | ||
|
|
916e6e817d | ||
|
|
bf3f9be98a | ||
|
|
7b112df36e | ||
|
|
4073330ad7 | ||
|
|
f525e4d292 | ||
|
|
baf3eb267f | ||
|
|
0b24446e8e | ||
|
|
8bd2ae9f20 | ||
|
|
f488d84dc7 | ||
|
|
f695a1b48e | ||
|
|
5fb45cb3c9 | ||
|
|
0838af8f3d | ||
|
|
255a431bc1 | ||
|
|
dccc807aff | ||
|
|
8b2aa9fb65 | ||
|
|
0d9b59dda1 | ||
|
|
65184ff73b | ||
|
|
b8f4d6ccdd | ||
|
|
c466dea250 | ||
|
|
1cf65cd880 | ||
|
|
7e43e14c7b | ||
|
|
410bbbd9a5 | ||
|
|
06b6739e68 | ||
|
|
045968b46a | ||
|
|
e292e8178c | ||
|
|
60d5ba9790 | ||
|
|
43c4503a77 | ||
|
|
aafe0f0fba | ||
|
|
dc5f119de1 | ||
|
|
a14fe7f195 | ||
|
|
4d6eb8a959 | ||
|
|
2b0bfb9eea | ||
|
|
5f2f5f34e0 | ||
|
|
a52fba9dbc | ||
|
|
ee6a045f17 | ||
|
|
ed2cb62f39 | ||
|
|
924581bdc8 | ||
|
|
19db693d00 | ||
|
|
48c3805a96 | ||
|
|
07b07e19b3 | ||
|
|
bdbbf3db68 | ||
|
|
f2a5ca26a6 |
@@ -335,19 +335,6 @@ if(NOT APPLE
|
||||
endif()
|
||||
endif()
|
||||
|
||||
check_function_exists(pow NOT_NEED_LIBM)
|
||||
if(NOT_NEED_LIBM)
|
||||
set(LIB_MATH "")
|
||||
else()
|
||||
set(CMAKE_REQUIRED_LIBRARIES m)
|
||||
check_function_exists(pow HAVE_LIBM)
|
||||
if(HAVE_LIBM)
|
||||
set(LIB_MATH m)
|
||||
else()
|
||||
message(FATAL_ERROR "No libm found for math support")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SUBPROJECT)
|
||||
if(NOT DEFINED BUILD_SHARED_LIBS)
|
||||
option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)" OFF)
|
||||
@@ -705,6 +692,8 @@ mark_as_advanced(MDBX_ENABLE_PROFGC)
|
||||
add_option(MDBX ENABLE_DBI_SPARSE
|
||||
"Support for sparse sets of DBI handles to reduce overhead when starting and processing transactions" ON)
|
||||
add_option(MDBX ENABLE_DBI_LOCKFREE "Support for deferred releasing and a lockfree path to quickly open DBI handles" ON)
|
||||
add_option(MDBX USE_FALLOCATE "Using posix_fallocate() or fcntl(F_PREALLOCATE) on OSX" AUTO)
|
||||
mark_as_advanced(MDBX_USE_FALLOCATE)
|
||||
|
||||
if(NOT MDBX_AMALGAMATED_SOURCE)
|
||||
if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG")
|
||||
@@ -1034,10 +1023,6 @@ if(MDBX_BUILD_TOOLS)
|
||||
target_setup_options(mdbx_${TOOL})
|
||||
target_link_libraries(mdbx_${TOOL} ${TOOL_MDBX_LIB})
|
||||
endforeach()
|
||||
if(LIB_MATH)
|
||||
target_link_libraries(mdbx_chk ${LIB_MATH})
|
||||
target_link_libraries(mdbx_stat ${LIB_MATH})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ######################################################################################################################
|
||||
|
||||
110
ChangeLog.md
110
ChangeLog.md
@@ -9,7 +9,7 @@ Please use the `stable` branch or the latest release for production environment
|
||||
Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`.
|
||||
Всё будет хорошо!
|
||||
|
||||
## v0.14.2 в активной разработке без конкретизации даты выпуска
|
||||
## v0.14.2 в разработке без конкретизации даты выпуска
|
||||
|
||||
Продолжение развития нового куста/линейки версий с добавлением функционала, расширением API и внутренними переработками.
|
||||
|
||||
@@ -17,6 +17,7 @@ Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`.
|
||||
|
||||
- [Erigon](https://erigon.tech/) за спонсорство.
|
||||
- [Артёму Воротникову](https://github.com/vorot93) за сообщение об ошибках и тестировании [призязок для Rust](https://github.com/vorot93/libmdbx-rs).
|
||||
- [Stefan de Konink](https://github.com/skinkie) for fixing [Python bindings](https://github.com/wtdcode/mdbx-py) and documentation improvement.
|
||||
|
||||
Новое:
|
||||
|
||||
@@ -35,6 +36,15 @@ Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`.
|
||||
- В API копирования БД добавлена опция `MDBX_CP_OVERWRITE` (перезапись целевого файла),
|
||||
а в утилиту `mdbx_copy` аналогичная по смыслу опция командной строки `-f` .
|
||||
|
||||
- Поддержка Harmony OS (OHOS).
|
||||
|
||||
- Операции с плавающей точкой больше не используются как внутри библиотеки, так и в утилитах, а из сценариев сборки удалено связывание c `libm`.
|
||||
|
||||
- Обеспечена возможность установки отладочных опций `MDBX_DBG_ASSERT`, `MDBX_DBG_AUDIT` и других, через переменные среды окружения.
|
||||
Но соответствующие отладочные возможности по-прежнему должны быть активированы во время сборки.
|
||||
|
||||
- Расширен и переработан состав информации формируемой функцией `mdbx_chk_env()` и выводимой утилитой `mdbx_chk`.
|
||||
|
||||
Исправления:
|
||||
|
||||
- Устранена критическая ошибка в функционале `mdbx_env_resurrect_after_fork()` при использовании SysV-семафоров.
|
||||
@@ -63,20 +73,41 @@ Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`.
|
||||
- Устранено получение неожиданного `SIGBUS` из-за отложенного/ленивого выделение места в заполненной файловой системе после приращения файла БД.
|
||||
Более подробное пояснение в комментарии коммита [`2a7f460345edbeb26a51782cbe6af3c55254ae77`](https://gitflic.ru/project/erthink/libmdbx/commit/2a7f460345edbeb26a51782cbe6af3c55254ae77).
|
||||
|
||||
- Исправлена assert-проверка в пути сканирования битовой карты DBI-дексрипторов приводившая к редким падениям 32-битных отладочных сборок.
|
||||
|
||||
- Переделан поиск утилит `lib.exe` и `dlltool.exe` при сборке посредством CMake на Windows.
|
||||
|
||||
- Устранено падение при выполнении Thread-Local-Storage конструкторов при выгрузке библиотеки и наличия экземпляров env, инициализация которых не была завершена.
|
||||
|
||||
- В C++ API добавлена упущенная специализация шаблона `std::hash<mdbx::buffer<...>>`.
|
||||
|
||||
Изменение поведения:
|
||||
|
||||
- Вновь включена/разрешена на старых ядрах Linux, начиная с версии 3.16, так как
|
||||
сейчас уже нет причин отказываться от работы на 3.16 поддерживая при этом ядра 4.x,
|
||||
и еще есть проекты (Isar, Isar-Community, Hive) которым требуется такая поддержка.
|
||||
|
||||
- Изменено значение по-умолчанию порога слияния страниц с 25% до 33%.
|
||||
|
||||
- Ошибка `MDBX_WANNA_RECOVERY` при открытии БД в режиме только-чтение теперь возвращается если размер БД не кратен размеру системной страницы,
|
||||
но игнорируется не кратность размеру блока выделения виртуальной памяти. Этим устраняется регресс, проявившейся вследствие изменения поведения
|
||||
после задействования системного вызова `fallocate()` для предотвращения `SIGBUS` после приращения файла БД в заполненной файловой системе.
|
||||
|
||||
- Для уменьшения вероятности неожиданных ошибок, вследствие переходных процессах и отложенной обработки в ядре ОС при конкурентном закрытии и
|
||||
открытии БД разными процессами, втрое увеличено количество повторных попыток захвата блокировок. Предположительно это также решит проблему
|
||||
неожиданных ошибок `EAGAIN` (11) на Android при рестарте приложений и открытия БД сразу после закрытия.
|
||||
|
||||
- По-умолчанию сборка для Windows теперь выполняется с использованием SDK уровня Windows 10, а не Windows 7.
|
||||
|
||||
- Изменён размер и состав структуры `MDBX_envinfo`, а функция `mdbx_env_info_ex()` больше не поддерживает старые варианты.
|
||||
Этим нарушена совместимость ABI со старыми версиями библиотеке, но сохранена совместимость API на уровне исходного кода.
|
||||
|
||||
Прочие доработки:
|
||||
|
||||
- Доработана логика отказа от использования OFD-блокировок на POSIX-платформах.
|
||||
Теперь кроме `EINVAL` учитываются дополнительные коды ошибок (`ENOSYS`, `ENOIMPL`, `ENOTSUP`, `ENOSUPP`, `EOPNOTSUPP`),
|
||||
что позволит работать собранной библиотеке в некоторых случаях, когда актуальное ядро/контейнер/эмулятор не поддерживает требуемых системных вызовов.
|
||||
|
||||
- Изменено значение по-умолчанию порога слияния страниц с 25% до 33%.
|
||||
|
||||
- Тесты дополнены сценариями для проверки добавленных возможностей, выявленных регрессов и ошибок.
|
||||
|
||||
- В тестовый фреймворк добавлена поддержка опции --numa # для привязки стохастического теста к NUMA-узлу,
|
||||
@@ -87,6 +118,16 @@ Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`.
|
||||
|
||||
- В функционал проверки целостности БД и утилиту mdbx_chk добавлен вывод гистограммы заполнения страниц образующих структуру дерева и участвующих в операциях разделения/слияния/перебалансировки.
|
||||
|
||||
- Для Android добавлен обход (workaround) для уменьшения вероятности системной ошибки `EAGAIN` возникающей
|
||||
из-за нехватки системных ресурсов и переходных процессов при закрытии и быстром повтороном открытии БД.
|
||||
|
||||
- Для Linux добавлено предотвращение проявления ошибки в реализации fast_commit файловой системы Ext4.
|
||||
|
||||
- В отладочные сборки на Windows при срабатывании assert-проверок добавлена поддержка вариантов "Пропустить" и "Повторить".
|
||||
|
||||
- В используемых на платформе Windows файловых блокировках задействованы ожидания с таймаутами,
|
||||
что теоретически должно снизить вероятность возникновения ошибок `ERROR_LOCK_VIOLATION` (`33`) при открытии БД в конкуррентных сценариях.
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
@@ -372,6 +413,69 @@ Donations are welcome to ETH `0xD104d8f8B2dC312aaD74899F83EBf3EEBDC1EA3A`.
|
||||
********************************************************************************
|
||||
|
||||
|
||||
## v0.13.9 "ИС-2" (IS-2) от 2025-10-31
|
||||
|
||||
Поддерживающий выпуск стабильной ветки с исправлением обнаруженных ошибок и устранением недочётов.
|
||||
Выпуск назван в память о cамом мощном тяжелом советском танке ["ИС-2"](https://ru.ruwiki.ru/wiki/ИС-2), который был принят на вооружение
|
||||
31 октября 1943 года в разгар Великой Отечественной Войны и долгое время оставался одной из сильнейших машин мира в категории по массе 40—50 тонн.
|
||||
|
||||
Благодарности:
|
||||
|
||||
- [Erigon](https://erigon.tech/) за спонсорство.
|
||||
|
||||
Исправления:
|
||||
|
||||
- Исправлена assert-проверка в пути сканирования битовой карты DBI-дескрипторов приводившая к редким падениям 32-битных отладочных сборок.
|
||||
|
||||
- Переделан поиск утилит `lib.exe` и `dlltool.exe` при сборке посредством CMake на Windows.
|
||||
|
||||
- Устранён регресс проявлявшийся увеличением (не-уменьшением) размера БД, после добавления использования `fallocate()`
|
||||
ради предотвращения SIGBUS при нехватке места в файловой системе где расположена БД.
|
||||
|
||||
- Устранена опечатка в тестовом скрипте `test/battery-tmux.sh` приводящая к созданию мусорного файла с именем `-`.
|
||||
|
||||
- Удалено лишнее/ненужное использование макроса `MDBX_INTERNAL` оставшееся после рефакторинга.
|
||||
|
||||
- Для Android добавлен обход (workaround) для уменьшения вероятности системной ошибки `EAGAIN` возникающей
|
||||
из-за нехватки системных ресурсов и переходных процессов при закрытии и быстром повторном открытии БД.
|
||||
|
||||
Прочие доработки:
|
||||
|
||||
- Поддержка Harmony OS (OHOS).
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
## v0.13.8 "Всеобуч" (v`seabooch) от 2025-08-31
|
||||
|
||||
Поддерживающий выпуск стабильной ветки с исправлением обнаруженных ошибок и устранением недочётов,
|
||||
в день 100 летнего юбилея Постановления Всероссийского центрального исполнительного комитета о всеобщем бесплатном начальном образовании.
|
||||
|
||||
Благодарности:
|
||||
|
||||
- [Erigon](https://erigon.tech/) за спонсорство.
|
||||
|
||||
Исправления:
|
||||
|
||||
- Устранена возможность получения неожиданного `SIGBUS` из-за отложенного/ленивого выделение места в заполненной файловой системе после приращения файла БД.
|
||||
Более подробное пояснение в комментарии коммита [`2930b304dc674bbccd188b7ce7c3f83755ef706e`](https://gitflic.ru/project/erthink/libmdbx/commit/2930b304dc674bbccd188b7ce7c3f83755ef706e).
|
||||
|
||||
Изменение поведения:
|
||||
|
||||
- Вновь включена/разрешена на старых ядрах Linux, начиная с версии 3.16, так как
|
||||
сейчас уже нет причин отказываться от работы на 3.16 поддерживая при этом ядра 4.x,
|
||||
и еще есть проекты (Isar, Isar-Community, Hive) которым требуется такая поддержка.
|
||||
|
||||
- Ошибка `MDBX_WANNA_RECOVERY` при открытии БД в режиме только-чтение теперь возвращается если размер БД не кратен размеру системной страницы,
|
||||
но игнорируется не кратность размеру блока выделения виртуальной памяти.
|
||||
Этим устраняется регресс, проявившейся вследствие изменения поведения после задействования
|
||||
системного вызова `fallocate()` для предотвращения `SIGBUS` после приращения файла БД в заполненной файловой системе.
|
||||
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
## v0.13.7 "Дружба" (Friendship) от 2025-07-30.
|
||||
|
||||
Поддерживающий выпуск стабильной ветки с исправлением обнаруженных ошибок и устранением недочётов,
|
||||
|
||||
13
GNUmakefile
13
GNUmakefile
@@ -106,11 +106,12 @@ endef
|
||||
define uname2ldflags
|
||||
case "$(UNAME)" in
|
||||
CYGWIN*|MINGW*|MSYS*|Windows*)
|
||||
echo '-Wl,--gc-sections,-O1';
|
||||
echo '-Wl,--gc-sections,-O1,--as-needed';
|
||||
;;
|
||||
*)
|
||||
$(LD) --help 2>/dev/null | grep -q -- --gc-sections && echo '-Wl,--gc-sections,-z,relro,-O1';
|
||||
$(LD) --help 2>/dev/null | grep -q -- -dead_strip && echo '-Wl,-dead_strip';
|
||||
$(LD) --help 2>/dev/null | grep -q -- --as-needed && echo '-Wl,--as-needed';
|
||||
;;
|
||||
esac
|
||||
endef
|
||||
@@ -119,16 +120,16 @@ endef
|
||||
define uname2libs
|
||||
case "$(UNAME)" in
|
||||
CYGWIN*|MINGW*|MSYS*|Windows*)
|
||||
echo '-lm -lntdll -lwinmm';
|
||||
echo '-lntdll -lwinmm';
|
||||
;;
|
||||
*SunOS*|*Solaris*)
|
||||
echo '-lm -lkstat -lrt';
|
||||
echo '-lkstat -lrt';
|
||||
;;
|
||||
*Darwin*|OpenBSD*)
|
||||
echo '-lm';
|
||||
echo '';
|
||||
;;
|
||||
*)
|
||||
echo '-lm -lrt';
|
||||
echo '-lrt';
|
||||
;;
|
||||
esac
|
||||
endef
|
||||
@@ -561,7 +562,7 @@ $(foreach file,$(TOOLS),$(eval $(call tool-rule,$(file))))
|
||||
|
||||
mdbx_test: $(TEST_OBJ) libmdbx.$(SO_SUFFIX)
|
||||
@echo ' LD $@'
|
||||
$(QUIET)$(CXX) $(CXXFLAGS) $(TEST_OBJ) -Wl,-rpath . -L . -l mdbx $(EXE_LDFLAGS) $(LIBS) -o $@
|
||||
$(QUIET)$(CXX) $(CXXFLAGS) $(TEST_OBJ) -Wl,-rpath . -L . -l mdbx $(EXE_LDFLAGS) $(LIBS) -lm -o $@
|
||||
|
||||
$(MDBX_GIT_DIR)/HEAD $(MDBX_GIT_DIR)/index $(MDBX_GIT_DIR)/refs/tags:
|
||||
@echo '*** ' >&2
|
||||
|
||||
9
TODO.md
9
TODO.md
@@ -1,6 +1,9 @@
|
||||
|
||||
TODO
|
||||
----
|
||||
|
||||
- add optional page-get and operation statistics for cursors.
|
||||
- split ASSERT() to CHECK{0,1,2,3} and basal `assert()`.
|
||||
- [SWIG](https://www.swig.org/).
|
||||
- Параллельная lto-сборка с устранением предупреждений.
|
||||
- Интеграция c DTrace и аналогами.
|
||||
@@ -16,9 +19,15 @@ TODO
|
||||
- [Support MessagePack for Keys & Values](https://libmdbx.dqdkfa.ru/dead-github/issues/115).
|
||||
- Packages for [Astra Linux](https://astralinux.ru/), [ALT Linux](https://www.altlinux.org/), [ROSA Linux](https://www.rosalinux.ru/), etc.
|
||||
|
||||
In development
|
||||
--------------
|
||||
- get-cached API.
|
||||
- digging/refactoring/optimizing page splitting and tree rebalance.
|
||||
|
||||
Done
|
||||
----
|
||||
|
||||
- HarmonyOS support.
|
||||
- Ранняя/не-отложенная очистка GC.
|
||||
- Рефакторинг gc-get/gc-put c переходом на "интервальные" списки.
|
||||
- [Engage new terminology](https://libmdbx.dqdkfa.ru/dead-github/issues/137).
|
||||
|
||||
@@ -82,6 +82,7 @@ class libmdbx(ConanFile):
|
||||
'mdbx.use_mincore': ['Auto', True, False],
|
||||
'mdbx.use_ofdlocks': ['Auto', True, False],
|
||||
'mdbx.use_sendfile': ['Auto', True, False],
|
||||
'mdbx.use_fallocate': ['Auto', True, False],
|
||||
'mdbx.without_msvc_crt': ['Default', True, False],
|
||||
'shared': [True, False],
|
||||
}
|
||||
@@ -113,6 +114,7 @@ class libmdbx(ConanFile):
|
||||
'mdbx.use_mincore': 'Auto',
|
||||
'mdbx.use_ofdlocks': 'Auto',
|
||||
'mdbx.use_sendfile': 'Auto',
|
||||
'mdbx.use_fallocate': 'Auto',
|
||||
'mdbx.without_msvc_crt': 'Default',
|
||||
'shared': True,
|
||||
}
|
||||
@@ -143,7 +145,8 @@ class libmdbx(ConanFile):
|
||||
'mdbx.use_copyfilerange': 'Advanced: Use `copy_file_range()` syscall. ',
|
||||
'mdbx.use_mincore': "Use Unix' `mincore()` to determine whether database pages are resident in memory. ",
|
||||
'mdbx.use_ofdlocks': 'Advanced: Use POSIX OFD-locks. ',
|
||||
'mdbx.use_sendfile': 'Advancedc: Use `sendfile()` syscall. ',
|
||||
'mdbx.use_sendfile': 'Advanced: Use `sendfile()` syscall. ',
|
||||
'mdbx.use_fallocate': 'Advanced: Use posix_fallocate() or fcntl(F_PREALLOCATE) on OSX. ',
|
||||
'mdbx.without_msvc_crt': 'Avoid dependence from MSVC CRT and use ntdll.dll instead. ',
|
||||
}
|
||||
|
||||
@@ -160,6 +163,7 @@ class libmdbx(ConanFile):
|
||||
self.options.rm_safe('mdbx.mmap_incoherent_file_write')
|
||||
self.options.rm_safe('mdbx.use_mincore')
|
||||
self.options.rm_safe('mdbx.use_ofdlocks')
|
||||
self.options.rm_safe('mdbx.use_fallocate')
|
||||
else:
|
||||
self.options.rm_safe('mdbx.without_msvc_crt')
|
||||
if is_apple_os(self):
|
||||
|
||||
63
mdbx.h
63
mdbx.h
@@ -837,7 +837,9 @@ enum MDBX_constants {
|
||||
|
||||
/** Log level
|
||||
* \note Levels detailed than (great than) \ref MDBX_LOG_NOTICE
|
||||
* requires build libmdbx with \ref MDBX_DEBUG option. */
|
||||
* requires build libmdbx with \ref MDBX_DEBUG option.
|
||||
*
|
||||
* \see mdbx_setup_debug() \see MDBX_log_level_t */
|
||||
typedef enum MDBX_log_level {
|
||||
/** Critical conditions, i.e. assertion failures.
|
||||
* \note libmdbx always produces such messages regardless
|
||||
@@ -894,24 +896,26 @@ typedef enum MDBX_log_level {
|
||||
*
|
||||
* \details `MDBX_DBG_DUMP` and `MDBX_DBG_LEGACY_MULTIOPEN` always have an
|
||||
* effect, but `MDBX_DBG_ASSERT`, `MDBX_DBG_AUDIT` and `MDBX_DBG_JITTER` only if
|
||||
* libmdbx built with \ref MDBX_DEBUG. */
|
||||
* libmdbx built with \ref MDBX_DEBUG.
|
||||
*
|
||||
* \see mdbx_setup_debug() \see MDBX_debug_flags_t */
|
||||
typedef enum MDBX_debug_flags {
|
||||
MDBX_DBG_NONE = 0,
|
||||
|
||||
/** Enable assertion checks.
|
||||
/** Enables assertion checks.
|
||||
* \note Always enabled for builds with `MDBX_FORCE_ASSERTIONS` option,
|
||||
* otherwise requires build with \ref MDBX_DEBUG > 0 */
|
||||
MDBX_DBG_ASSERT = 1,
|
||||
|
||||
/** Enable pages usage audit at commit transactions.
|
||||
/** Enables pages usage audit at commit transactions.
|
||||
* \note Requires build with \ref MDBX_DEBUG > 0 */
|
||||
MDBX_DBG_AUDIT = 2,
|
||||
|
||||
/** Enable small random delays in critical points.
|
||||
/** Enables small random delays in critical points.
|
||||
* \note Requires build with \ref MDBX_DEBUG > 0 */
|
||||
MDBX_DBG_JITTER = 4,
|
||||
|
||||
/** Include or not meta-pages in coredump files.
|
||||
/** Controls including of a database(s) meta-pages in coredump files.
|
||||
* \note May affect performance in \ref MDBX_WRITEMAP mode */
|
||||
MDBX_DBG_DUMP = 8,
|
||||
|
||||
@@ -921,9 +925,8 @@ typedef enum MDBX_debug_flags {
|
||||
/** Allow read and write transactions overlapping for the same thread. */
|
||||
MDBX_DBG_LEGACY_OVERLAP = 32,
|
||||
|
||||
/** Don't auto-upgrade format signature.
|
||||
* \note However a new write transactions will use and store
|
||||
* the last signature regardless this flag */
|
||||
/** Disables automatic updating of the database format signature, i.e. upgrade database format on a media.
|
||||
* \note Nonetheless a new write transactions will use and store the last signature regardless this flag */
|
||||
MDBX_DBG_DONT_UPGRADE = 64,
|
||||
|
||||
#ifdef ENABLE_UBSAN
|
||||
@@ -958,7 +961,9 @@ typedef void MDBX_debug_func(MDBX_log_level_t loglevel, const char *function, in
|
||||
|
||||
/** \brief Setup global log-level, debug options and debug logger.
|
||||
* \returns The previously `debug_flags` in the 0-15 bits
|
||||
* and `log_level` in the 16-31 bits. */
|
||||
* and `log_level` in the 16-31 bits.
|
||||
*
|
||||
* \see MDBX_log_level_t \see MDBX_debug_flags_t */
|
||||
LIBMDBX_API int mdbx_setup_debug(MDBX_log_level_t log_level, MDBX_debug_flags_t debug_flags, MDBX_debug_func *logger);
|
||||
|
||||
typedef void MDBX_debug_func_nofmt(MDBX_log_level_t loglevel, const char *function, int line, const char *msg,
|
||||
@@ -1007,7 +1012,10 @@ MDBX_NORETURN LIBMDBX_API void mdbx_panic(const char *fmt, ...) MDBX_PRINTF_ARGS
|
||||
|
||||
/** \brief Panics with asserton failed message and causes abnormal process
|
||||
* termination. */
|
||||
MDBX_NORETURN LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, const char *msg, const char *func, unsigned line);
|
||||
#if !((defined(_WIN32) || defined(_WIN64)) && !MDBX_WITHOUT_MSVC_CRT)
|
||||
MDBX_NORETURN
|
||||
#endif /* MDBX_WITHOUT_MSVC_CRT */
|
||||
LIBMDBX_API void mdbx_assert_fail(const MDBX_env *env, const char *msg, const char *func, unsigned line);
|
||||
/** end of c_debug @} */
|
||||
|
||||
/** \brief Environment flags
|
||||
@@ -1904,8 +1912,7 @@ typedef enum MDBX_error {
|
||||
* - The table was dropped and recreated with different flags. */
|
||||
MDBX_INCOMPATIBLE = -30784,
|
||||
|
||||
/** Invalid reuse of reader locktable slot,
|
||||
* e.g. read-transaction already run for current thread */
|
||||
/** Reader locktable slot was unexpectly reused or cleared by an enemy thread */
|
||||
MDBX_BAD_RSLOT = -30783,
|
||||
|
||||
/** Transaction is not valid for requested operation,
|
||||
@@ -2810,17 +2817,20 @@ struct MDBX_envinfo {
|
||||
uint64_t shrink; /**< Shrink threshold for datafile */
|
||||
uint64_t grow; /**< Growth step for datafile */
|
||||
} mi_geo;
|
||||
uint64_t mi_mapsize; /**< Size of the data memory map */
|
||||
uint64_t mi_mapsize; /**< Size of the database memory map */
|
||||
uint64_t mi_dxb_fsize; /**< Current database file size */
|
||||
uint64_t mi_dxb_fallocated; /**< Space allocated for the database file in a filesystem */
|
||||
uint64_t mi_last_pgno; /**< Number of the last used page */
|
||||
uint64_t mi_recent_txnid; /**< ID of the last committed transaction */
|
||||
uint64_t mi_latter_reader_txnid; /**< ID of the last reader transaction */
|
||||
uint64_t mi_self_latter_reader_txnid; /**< ID of the last reader transaction
|
||||
of caller process */
|
||||
uint64_t mi_self_latter_reader_txnid; /**< ID of the last reader transaction of this/current process */
|
||||
uint64_t mi_meta_txnid[3], mi_meta_sign[3];
|
||||
uint32_t mi_maxreaders; /**< Total reader slots in the environment */
|
||||
uint32_t mi_numreaders; /**< Max reader slots used in the environment */
|
||||
uint32_t mi_dxb_pagesize; /**< Database pagesize */
|
||||
uint32_t mi_sys_pagesize; /**< System pagesize */
|
||||
uint32_t mi_sys_upcblk; /**< System "Unified Page Cache" block size */
|
||||
uint32_t mi_sys_ioblk; /**< Filesystem I/O block size */
|
||||
|
||||
/** \brief A mostly unique ID that is regenerated on each boot.
|
||||
|
||||
@@ -4562,6 +4572,10 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b) MDBX_CXX17_NOEX
|
||||
* \param [out] dbi Address where the new \ref MDBX_dbi handle
|
||||
* will be stored.
|
||||
*
|
||||
* The name in \ref mdbx_dbi_open() is a null terminated string. While
|
||||
* \ref mdbx_dbi_open2() supports arbitrary length keys which are not
|
||||
* truncated, for example to support a fixed width integer type.
|
||||
*
|
||||
* For \ref mdbx_dbi_open_ex() additional arguments allow you to set custom
|
||||
* comparison functions for keys and values (for multimaps).
|
||||
* \see avoid_custom_comparators
|
||||
@@ -4594,6 +4608,8 @@ LIBMDBX_API int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flag
|
||||
* \param [in] name The name of the table to open. If only a single
|
||||
* table is needed in the environment,
|
||||
* this value may be NULL.
|
||||
* The name in \ref mdbx_dbi_open_ex() is null terminated,
|
||||
* while \ref mdbx_dbi_open_ex2() supports an arbitrary length.
|
||||
* \param [in] flags Special options for this table.
|
||||
* \param [in] keycmp Optional custom key comparison function for a table.
|
||||
* \param [in] datacmp Optional custom data comparison function for a table.
|
||||
@@ -6534,17 +6550,17 @@ typedef struct MDBX_chk_table {
|
||||
|
||||
size_t payload_bytes, lost_bytes;
|
||||
struct {
|
||||
size_t all, empty, other;
|
||||
size_t all, empty, broken;
|
||||
size_t branch, leaf;
|
||||
size_t nested_branch, nested_leaf, nested_subleaf;
|
||||
} pages;
|
||||
struct {
|
||||
/// Tree deep histogram
|
||||
struct MDBX_chk_histogram deep;
|
||||
struct MDBX_chk_histogram height;
|
||||
/// Histogram of large/overflow pages length
|
||||
struct MDBX_chk_histogram large_pages;
|
||||
/// Histogram of nested trees height, span length for GC
|
||||
struct MDBX_chk_histogram nested_tree;
|
||||
struct MDBX_chk_histogram nested_height;
|
||||
/// Keys length histogram
|
||||
struct MDBX_chk_histogram key_len;
|
||||
/// Values length histogram
|
||||
@@ -6552,9 +6568,9 @@ typedef struct MDBX_chk_table {
|
||||
/// Number of multi-values (aka duplicates) histogram
|
||||
struct MDBX_chk_histogram multival;
|
||||
/// Histogram of branch and leaf pages filling in percents
|
||||
struct MDBX_chk_histogram tree_filling;
|
||||
struct MDBX_chk_histogram tree_density;
|
||||
/// Histogram of nested tree(s) branch and leaf pages filling in percents
|
||||
struct MDBX_chk_histogram nested_tree_filling;
|
||||
struct MDBX_chk_histogram large_or_nested_density;
|
||||
} histogram;
|
||||
} MDBX_chk_table_t;
|
||||
|
||||
@@ -6661,6 +6677,11 @@ LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, MDBX
|
||||
* \returns Нулевое значение в случае успеха, иначе код ошибки. */
|
||||
LIBMDBX_API int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx);
|
||||
|
||||
LIBMDBX_API const char *mdbx_ratio2digits(uint64_t numerator, uint64_t denominator, int precision, char *buffer,
|
||||
size_t buffer_size);
|
||||
|
||||
LIBMDBX_API const char *mdbx_ratio2percents(uint64_t value, uint64_t whole, char *buffer, size_t buffer_size);
|
||||
|
||||
/** end of chk @} */
|
||||
|
||||
/** end of c_api @} */
|
||||
|
||||
6
mdbx.h++
6
mdbx.h++
@@ -6415,6 +6415,12 @@ template <> struct hash<::mdbx::slice> {
|
||||
MDBX_CXX14_CONSTEXPR size_t operator()(::mdbx::slice const &slice) const noexcept { return slice.hash_value(); }
|
||||
};
|
||||
|
||||
template <class ALLOCATOR, typename CAPACITY_POLICY> struct hash<::mdbx::buffer<ALLOCATOR, CAPACITY_POLICY>> {
|
||||
MDBX_CXX14_CONSTEXPR size_t operator()(::mdbx::buffer<ALLOCATOR, CAPACITY_POLICY> const &buffer) const noexcept {
|
||||
return buffer.hash_value();
|
||||
}
|
||||
};
|
||||
|
||||
/// end cxx_api @}
|
||||
} // namespace std
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
From f2f1f6e76c1538d044b552d9e7ecedc3433e6cd9 Mon Sep 17 00:00:00 2001
|
||||
From b2f1297dd2cd42cc0e04f1900fbf6da6c2694b7b Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?=
|
||||
=?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= <leo@yuriev.ru>
|
||||
Date: Sun, 3 Aug 2025 23:59:11 +0300
|
||||
Date: Fri, 31 Oct 2025 16:58:31 +0300
|
||||
Subject: [PATCH] package/libmdbx: new package (library/database).
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This patch adds libmdbx:
|
||||
This patch adds libmdbx 0.13.9:
|
||||
- libmdbx is one of the fastest compact embeddable key-value ACID database.
|
||||
- libmdbx has a specific set of properties and capabilities,
|
||||
focused on creating unique lightweight solutions.
|
||||
@@ -15,158 +15,55 @@ This patch adds libmdbx:
|
||||
in terms of reliability, features and performance.
|
||||
- more information at https://libmdbx.dqdkfa.ru
|
||||
|
||||
The 0.13.7 "Дружба" (Friendship) is stable release of _libmdbx_ branch with new superior features.
|
||||
|
||||
The complete ChangeLog: https://gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md
|
||||
The 0.13.9 "ИС-2" (IS-2) is bugfix release of the stable branch.
|
||||
For more information please see [ChangeLog](https://github.com/erthink/libmdbx/blob/stable/ChangeLog.md).
|
||||
|
||||
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
|
||||
---
|
||||
DEVELOPERS | 3 +++
|
||||
package/Config.in | 1 +
|
||||
package/libmdbx/Config.in | 45 ++++++++++++++++++++++++++++++++++++
|
||||
package/libmdbx/libmdbx.hash | 6 +++++
|
||||
package/libmdbx/libmdbx.mk | 41 ++++++++++++++++++++++++++++++++
|
||||
5 files changed, 96 insertions(+)
|
||||
create mode 100644 package/libmdbx/Config.in
|
||||
create mode 100644 package/libmdbx/libmdbx.hash
|
||||
create mode 100644 package/libmdbx/libmdbx.mk
|
||||
package/libmdbx/Config.in | 4 +++-
|
||||
package/libmdbx/libmdbx.hash | 2 +-
|
||||
package/libmdbx/libmdbx.mk | 2 +-
|
||||
3 files changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/DEVELOPERS b/DEVELOPERS
|
||||
index 9ab1e125f4..758ff6a2d5 100644
|
||||
--- a/DEVELOPERS
|
||||
+++ b/DEVELOPERS
|
||||
@@ -1482,6 +1482,9 @@ N: Leon Anavi <leon.anavi@konsulko.com>
|
||||
F: board/olimex/a10_olinuxino
|
||||
F: configs/olimex_a10_olinuxino_lime_defconfig
|
||||
|
||||
+N: Leonid Yuriev <leo@yuriev.ru>
|
||||
+F: package/libmdbx/
|
||||
+
|
||||
N: Lionel Flandrin <lionel@svkt.org>
|
||||
F: package/python-babel/
|
||||
F: package/python-daemonize/
|
||||
diff --git a/package/Config.in b/package/Config.in
|
||||
index 016a99ed1a..a6f95bfaa9 100644
|
||||
--- a/package/Config.in
|
||||
+++ b/package/Config.in
|
||||
@@ -1372,6 +1372,7 @@ menu "Database"
|
||||
source "package/kompexsqlite/Config.in"
|
||||
source "package/leveldb/Config.in"
|
||||
source "package/libgit2/Config.in"
|
||||
+ source "package/libmdbx/Config.in"
|
||||
source "package/libodb/Config.in"
|
||||
source "package/libodb-boost/Config.in"
|
||||
source "package/libodb-mysql/Config.in"
|
||||
diff --git a/package/libmdbx/Config.in b/package/libmdbx/Config.in
|
||||
new file mode 100644
|
||||
index 0000000000..a9a4ac45c5
|
||||
--- /dev/null
|
||||
index a9a4ac45c5..1640dbd9de 100644
|
||||
--- a/package/libmdbx/Config.in
|
||||
+++ b/package/libmdbx/Config.in
|
||||
@@ -0,0 +1,45 @@
|
||||
+config BR2_PACKAGE_LIBMDBX
|
||||
+ bool "libmdbx"
|
||||
+ depends on BR2_USE_MMU
|
||||
+ depends on BR2_TOOLCHAIN_HAS_SYNC_4
|
||||
+ depends on BR2_TOOLCHAIN_HAS_THREADS
|
||||
+ depends on BR2_TOOLCHAIN_GCC_AT_LEAST_4_4
|
||||
+ help
|
||||
+ One of the fastest compact key-value ACID database
|
||||
+ without WAL. libmdbx has a specific set of properties
|
||||
+ and capabilities, focused on creating unique lightweight
|
||||
+ solutions.
|
||||
+
|
||||
+ libmdbx surpasses the legendary LMDB in terms of
|
||||
+ reliability, features and performance.
|
||||
+
|
||||
+ https://libmdbx.dqdkfa.ru
|
||||
+
|
||||
+if BR2_PACKAGE_LIBMDBX
|
||||
+
|
||||
+config BR2_PACKAGE_LIBMDBX_TOOLS
|
||||
+ bool "install tools"
|
||||
+ help
|
||||
+ Install libmdbx tools for checking, dump, restore
|
||||
+ and show statistics of databases.
|
||||
+
|
||||
+config BR2_PACKAGE_LIBMDBX_CXX
|
||||
+ bool "C++ API"
|
||||
+ depends on BR2_INSTALL_LIBSTDCPP
|
||||
+ depends on BR2_TOOLCHAIN_GCC_AT_LEAST_4_8
|
||||
+ depends on !BR2_TOOLCHAIN_HAS_GCC_BUG_64735
|
||||
+ help
|
||||
+ Enable modern C++11/14/17/20 API for libmdbx.
|
||||
+
|
||||
+comment "libmdbx C++ support needs a toolchain w/ C++11, gcc >= 4.8 w/o bug#64735"
|
||||
+ depends on !BR2_INSTALL_LIBSTDCPP || \
|
||||
+ !BR2_TOOLCHAIN_GCC_AT_LEAST_4_8 || \
|
||||
+ BR2_TOOLCHAIN_HAS_GCC_BUG_64735
|
||||
+
|
||||
+endif
|
||||
+
|
||||
+comment "libmdbx needs MMU, a toolchain w/ threads, gcc >= 4.4 w/ 4-byte atomics"
|
||||
+ depends on BR2_USE_MMU
|
||||
+ depends on !BR2_TOOLCHAIN_HAS_THREADS || \
|
||||
+ !BR2_TOOLCHAIN_HAS_SYNC_4 || \
|
||||
+ !BR2_TOOLCHAIN_GCC_AT_LEAST_4_4
|
||||
@@ -11,7 +11,9 @@ config BR2_PACKAGE_LIBMDBX
|
||||
solutions.
|
||||
|
||||
libmdbx surpasses the legendary LMDB in terms of
|
||||
- reliability, features and performance.
|
||||
+ reliability, features and performance. At the end of 2024
|
||||
+ libmdbx was chosen by all modern Ethereum frontiers/nodes
|
||||
+ as a storage engine.
|
||||
|
||||
https://libmdbx.dqdkfa.ru
|
||||
|
||||
diff --git a/package/libmdbx/libmdbx.hash b/package/libmdbx/libmdbx.hash
|
||||
new file mode 100644
|
||||
index 0000000000..8c7efb184b
|
||||
--- /dev/null
|
||||
index ae5266716b..4a4f302015 100644
|
||||
--- a/package/libmdbx/libmdbx.hash
|
||||
+++ b/package/libmdbx/libmdbx.hash
|
||||
@@ -0,0 +1,6 @@
|
||||
+# Hashes from: https://libmdbx.dqdkfa.ru/release/SHA256SUMS
|
||||
+sha256 d00c1287ec6bbc366363ccdd3eea97bd470ccb5cc102d56b341f84a9fba7e8e9 libmdbx-amalgamated-0.13.7.tar.xz
|
||||
+
|
||||
+# Locally calculated
|
||||
+sha256 0d542e0c8804e39aa7f37eb00da5a762149dc682d7829451287e11b938e94594 LICENSE
|
||||
+sha256 651f71b46c6bb0046d2122df7f9def9cb24f4dc28c5b11cef059f66565cda30f NOTICE
|
||||
@@ -1,5 +1,5 @@
|
||||
# Hashes from: https://libmdbx.dqdkfa.ru/release/SHA256SUMS
|
||||
-sha256 57db987de6f7ccc66a66ae28a7bda9f9fbb48ac5fb9279bcca92fd5de13075d1 libmdbx-amalgamated-0.13.6.tar.xz
|
||||
+sha256 63d2608c8f7c23185c0d27d817d42dd720e84973224ffc584c7f7b522f5f06fe libmdbx-amalgamated-0.13.9.tar.xz
|
||||
|
||||
# Locally calculated
|
||||
sha256 0d542e0c8804e39aa7f37eb00da5a762149dc682d7829451287e11b938e94594 LICENSE
|
||||
diff --git a/package/libmdbx/libmdbx.mk b/package/libmdbx/libmdbx.mk
|
||||
new file mode 100644
|
||||
index 0000000000..bbb37f21a6
|
||||
--- /dev/null
|
||||
index f461d98397..62817a98f8 100644
|
||||
--- a/package/libmdbx/libmdbx.mk
|
||||
+++ b/package/libmdbx/libmdbx.mk
|
||||
@@ -0,0 +1,41 @@
|
||||
+################################################################################
|
||||
+#
|
||||
+# libmdbx
|
||||
+#
|
||||
+################################################################################
|
||||
+
|
||||
+LIBMDBX_VERSION = 0.13.7
|
||||
+LIBMDBX_SOURCE = libmdbx-amalgamated-$(LIBMDBX_VERSION).tar.xz
|
||||
+LIBMDBX_SITE = https://libmdbx.dqdkfa.ru/release
|
||||
+LIBMDBX_SUPPORTS_IN_SOURCE_BUILD = NO
|
||||
+LIBMDBX_LICENSE = Apache-2.0
|
||||
+LIBMDBX_LICENSE_FILES = LICENSE NOTICE
|
||||
+LIBMDBX_STRIP_COMPONENTS = 0
|
||||
+LIBMDBX_INSTALL_STAGING = YES
|
||||
+
|
||||
+# Set CMAKE_BUILD_TYPE to Release to remove -Werror and avoid a build failure
|
||||
+# with glibc < 2.12
|
||||
+LIBMDBX_CONF_OPTS = \
|
||||
+ -DCMAKE_BUILD_TYPE=Release \
|
||||
+ -DMDBX_INSTALL_MANPAGES=OFF \
|
||||
+ -DBUILD_FOR_NATIVE_CPU=OFF \
|
||||
+ -DMDBX_BUILD_CXX=$(if $(BR2_PACKAGE_LIBMDBX_CXX),ON,OFF) \
|
||||
+ -DMDBX_BUILD_TOOLS=$(if $(BR2_PACKAGE_LIBMDBX_TOOLS),ON,OFF)
|
||||
+
|
||||
+ifeq ($(BR2_STATIC_LIBS)$(BR2_SHARED_STATIC_LIBS),y)
|
||||
+LIBMDBX_CONF_OPTS += -DMDBX_INSTALL_STATIC=ON
|
||||
+else
|
||||
+LIBMDBX_CONF_OPTS += -DMDBX_INSTALL_STATIC=OFF
|
||||
+endif
|
||||
+
|
||||
+ifeq ($(BR2_SHARED_LIBS)$(BR2_SHARED_STATIC_LIBS),y)
|
||||
+LIBMDBX_CONF_OPTS += \
|
||||
+ -DMDBX_BUILD_SHARED_LIBRARY=ON \
|
||||
+ -DMDBX_LINK_TOOLS_NONSTATIC=ON
|
||||
+else
|
||||
+LIBMDBX_CONF_OPTS += \
|
||||
+ -DMDBX_BUILD_SHARED_LIBRARY=OFF \
|
||||
+ -DMDBX_LINK_TOOLS_NONSTATIC=OFF
|
||||
+endif
|
||||
+
|
||||
+$(eval $(cmake-package))
|
||||
@@ -4,7 +4,7 @@
|
||||
#
|
||||
################################################################################
|
||||
|
||||
-LIBMDBX_VERSION = 0.13.6
|
||||
+LIBMDBX_VERSION = 0.13.9
|
||||
LIBMDBX_SOURCE = libmdbx-amalgamated-$(LIBMDBX_VERSION).tar.xz
|
||||
LIBMDBX_SITE = https://libmdbx.dqdkfa.ru/release
|
||||
LIBMDBX_SUPPORTS_IN_SOURCE_BUILD = NO
|
||||
--
|
||||
2.50.1
|
||||
2.51.2
|
||||
|
||||
|
||||
@@ -479,7 +479,7 @@ __cold static int copy_with_compacting(MDBX_env *env, MDBX_txn *txn, mdbx_fileha
|
||||
if (meta->geometry.now != meta->geometry.first_unallocated) {
|
||||
const size_t whole_size = pgno2bytes(env, meta->geometry.now);
|
||||
if (!dest_is_pipe)
|
||||
return osal_fallocate(fd, whole_size);
|
||||
return osal_fsetsize(fd, whole_size);
|
||||
|
||||
const size_t used_size = pgno2bytes(env, meta->geometry.first_unallocated);
|
||||
memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF);
|
||||
@@ -648,7 +648,7 @@ retry_snap_meta:
|
||||
/* Extend file if required */
|
||||
if (likely(rc == MDBX_SUCCESS) && whole_size != used_size) {
|
||||
if (!dest_is_pipe)
|
||||
rc = osal_fallocate(fd, whole_size);
|
||||
rc = osal_fsetsize(fd, whole_size);
|
||||
else {
|
||||
memset(data_buffer, 0, (size_t)MDBX_ENVCOPY_WRITEBUF);
|
||||
for (size_t offset = used_size; rc == MDBX_SUCCESS && offset < whole_size;) {
|
||||
|
||||
@@ -241,7 +241,7 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, siz
|
||||
return LOG_IFERR(MDBX_BAD_TXN);
|
||||
|
||||
if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) {
|
||||
rc = tbl_fetch((MDBX_txn *)txn, dbi);
|
||||
rc = tbl_refresh((MDBX_txn *)txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
214
src/api-env.c
214
src/api-env.c
@@ -245,7 +245,8 @@ __cold int mdbx_env_create(MDBX_env **penv) {
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
imports.srwl_Init(&env->remap_guard);
|
||||
InitializeCriticalSection(&env->windowsbug_lock);
|
||||
InitializeCriticalSection(&env->lck_event_cs);
|
||||
InitializeCriticalSection(&env->dxb_event_cs);
|
||||
#else
|
||||
rc = osal_fastmutex_init(&env->remap_guard);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
@@ -638,7 +639,8 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) {
|
||||
ENSURE(env, osal_fastmutex_destroy(&env->dbi_lock) == MDBX_SUCCESS);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
/* remap_guard don't have destructor (Slim Reader/Writer Lock) */
|
||||
DeleteCriticalSection(&env->windowsbug_lock);
|
||||
DeleteCriticalSection(&env->lck_event_cs);
|
||||
DeleteCriticalSection(&env->dxb_event_cs);
|
||||
#else
|
||||
ENSURE(env, osal_fastmutex_destroy(&env->remap_guard) == MDBX_SUCCESS);
|
||||
#endif /* Windows */
|
||||
@@ -664,11 +666,68 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) {
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, const size_t bytes,
|
||||
troika_t *const troika) {
|
||||
const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid);
|
||||
const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
|
||||
const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid);
|
||||
__must_check_result static int env_info_sys(const MDBX_env *env, MDBX_envinfo *out) {
|
||||
out->mi_bootid.current.x = globals.bootid.x;
|
||||
out->mi_bootid.current.y = globals.bootid.y;
|
||||
out->mi_sys_pagesize = globals.sys_pagesize;
|
||||
#ifdef __OpenBSD__
|
||||
out->mi_sys_upcblk = 0;
|
||||
#elif defined(_WIN32) || defined(_WIN64)
|
||||
out->mi_sys_upcblk = globals.sys_allocation_granularity;
|
||||
#elif defined(AT_UCACHEBSIZE)
|
||||
out->mi_sys_upcblk = globals.sys_unified_cache_block;
|
||||
#else
|
||||
out->mi_sys_upcblk = globals.sys_pagesize;
|
||||
#endif /* AT_UCACHEBSIZE */
|
||||
|
||||
out->mi_dxb_fsize = 0;
|
||||
out->mi_dxb_fallocated = 0;
|
||||
out->mi_sys_ioblk = 0;
|
||||
if (env->dxb_mmap.fd != INVALID_HANDLE_VALUE) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
union {
|
||||
BY_HANDLE_FILE_INFORMATION bh;
|
||||
FILE_STANDARD_INFO std;
|
||||
#if _WIN32_WINNT >= _WIN32_WINNT_WIN8
|
||||
FILE_STORAGE_INFO storage;
|
||||
#endif
|
||||
} sys_finfo;
|
||||
if (imports.GetFileInformationByHandleEx &&
|
||||
imports.GetFileInformationByHandleEx(env->dxb_mmap.fd, FileStandardInfo, &sys_finfo.std,
|
||||
sizeof(sys_finfo.std))) {
|
||||
out->mi_dxb_fsize = sys_finfo.std.EndOfFile.QuadPart;
|
||||
out->mi_dxb_fallocated = sys_finfo.std.AllocationSize.QuadPart;
|
||||
#if _WIN32_WINNT >= _WIN32_WINNT_WIN8
|
||||
if (imports.GetFileInformationByHandleEx(env->dxb_mmap.fd, FileStorageInfo, &sys_finfo.storage,
|
||||
sizeof(sys_finfo.storage))) {
|
||||
out->mi_sys_ioblk = (sys_finfo.storage.FileSystemEffectivePhysicalBytesPerSectorForAtomicity >
|
||||
sys_finfo.storage.LogicalBytesPerSector)
|
||||
? sys_finfo.storage.FileSystemEffectivePhysicalBytesPerSectorForAtomicity
|
||||
: sys_finfo.storage.LogicalBytesPerSector;
|
||||
}
|
||||
#endif
|
||||
} else if (GetFileInformationByHandle(env->dxb_mmap.fd, &sys_finfo.bh)) {
|
||||
out->mi_dxb_fsize = sys_finfo.bh.nFileSizeLow | (uint64_t)sys_finfo.bh.nFileSizeHigh << 32;
|
||||
} else
|
||||
return GetLastError();
|
||||
#else
|
||||
struct stat sys_fstat;
|
||||
if (fstat(env->dxb_mmap.fd, &sys_fstat))
|
||||
return errno;
|
||||
out->mi_dxb_fsize = sys_fstat.st_size;
|
||||
out->mi_dxb_fallocated = UINT64_C(512) * sys_fstat.st_blocks;
|
||||
out->mi_sys_ioblk = sys_fstat.st_blksize;
|
||||
#endif /* !Windows */
|
||||
}
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
__must_check_result static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out,
|
||||
troika_t *const troika) {
|
||||
int err = env_info_sys(env, out);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
if (unlikely(env->flags & ENV_FATAL_ERROR))
|
||||
return MDBX_PANIC;
|
||||
|
||||
@@ -678,7 +737,6 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo
|
||||
/* environment not yet opened */
|
||||
#if 1
|
||||
/* default behavior: returns the available info but zeroed the rest */
|
||||
memset(out, 0, bytes);
|
||||
out->mi_geo.lower = env->geo_in_bytes.lower;
|
||||
out->mi_geo.upper = env->geo_in_bytes.upper;
|
||||
out->mi_geo.shrink = env->geo_in_bytes.shrink;
|
||||
@@ -686,11 +744,6 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo
|
||||
out->mi_geo.current = env->geo_in_bytes.now;
|
||||
out->mi_maxreaders = env->max_readers;
|
||||
out->mi_dxb_pagesize = env->ps;
|
||||
out->mi_sys_pagesize = globals.sys_pagesize;
|
||||
if (likely(bytes > size_before_bootid)) {
|
||||
out->mi_bootid.current.x = globals.bootid.x;
|
||||
out->mi_bootid.current.y = globals.bootid.y;
|
||||
}
|
||||
return MDBX_SUCCESS;
|
||||
#else
|
||||
/* some users may prefer this behavior: return appropriate error */
|
||||
@@ -710,13 +763,10 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo
|
||||
out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->sign);
|
||||
out->mi_meta_txnid[2] = troika->txnid[2];
|
||||
out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->sign);
|
||||
if (likely(bytes > size_before_bootid)) {
|
||||
memcpy(&out->mi_bootid.meta[0], &meta0->bootid, 16);
|
||||
memcpy(&out->mi_bootid.meta[1], &meta1->bootid, 16);
|
||||
memcpy(&out->mi_bootid.meta[2], &meta2->bootid, 16);
|
||||
if (likely(bytes > size_before_dxbid))
|
||||
memcpy(&out->mi_dxbid, &meta0->dxbid, 16);
|
||||
}
|
||||
memcpy(&out->mi_bootid.meta[0], &meta0->bootid, 16);
|
||||
memcpy(&out->mi_bootid.meta[1], &meta1->bootid, 16);
|
||||
memcpy(&out->mi_bootid.meta[2], &meta2->bootid, 16);
|
||||
memcpy(&out->mi_dxbid, &meta0->dxbid, 16);
|
||||
|
||||
const volatile meta_t *txn_meta = head.ptr_v;
|
||||
out->mi_last_pgno = txn_meta->geometry.first_unallocated - 1;
|
||||
@@ -740,44 +790,38 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo
|
||||
out->mi_maxreaders = env->max_readers;
|
||||
out->mi_numreaders = env->lck_mmap.lck ? atomic_load32(&lck->rdt_length, mo_Relaxed) : INT32_MAX;
|
||||
out->mi_dxb_pagesize = env->ps;
|
||||
out->mi_sys_pagesize = globals.sys_pagesize;
|
||||
|
||||
if (likely(bytes > size_before_bootid)) {
|
||||
const uint64_t unsynced_pages =
|
||||
atomic_load64(&lck->unsynced_pages, mo_Relaxed) +
|
||||
((uint32_t)out->mi_recent_txnid != atomic_load32(&lck->meta_sync_txnid, mo_Relaxed));
|
||||
out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages);
|
||||
const uint64_t monotime_now = osal_monotime();
|
||||
uint64_t ts = atomic_load64(&lck->eoos_timestamp, mo_Relaxed);
|
||||
out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0;
|
||||
ts = atomic_load64(&lck->readers_check_timestamp, mo_Relaxed);
|
||||
out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0;
|
||||
out->mi_autosync_threshold = pgno2bytes(env, atomic_load32(&lck->autosync_threshold, mo_Relaxed));
|
||||
out->mi_autosync_period_seconds16dot16 =
|
||||
osal_monotime_to_16dot16_noUnderflow(atomic_load64(&lck->autosync_period, mo_Relaxed));
|
||||
out->mi_bootid.current.x = globals.bootid.x;
|
||||
out->mi_bootid.current.y = globals.bootid.y;
|
||||
out->mi_mode = env->lck_mmap.lck ? lck->envmode.weak : env->flags;
|
||||
}
|
||||
const uint64_t unsynced_pages = atomic_load64(&lck->unsynced_pages, mo_Relaxed) +
|
||||
((uint32_t)out->mi_recent_txnid != atomic_load32(&lck->meta_sync_txnid, mo_Relaxed));
|
||||
out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages);
|
||||
const uint64_t monotime_now = osal_monotime();
|
||||
uint64_t ts = atomic_load64(&lck->eoos_timestamp, mo_Relaxed);
|
||||
out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0;
|
||||
ts = atomic_load64(&lck->readers_check_timestamp, mo_Relaxed);
|
||||
out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0;
|
||||
out->mi_autosync_threshold = pgno2bytes(env, atomic_load32(&lck->autosync_threshold, mo_Relaxed));
|
||||
out->mi_autosync_period_seconds16dot16 =
|
||||
osal_monotime_to_16dot16_noUnderflow(atomic_load64(&lck->autosync_period, mo_Relaxed));
|
||||
out->mi_bootid.current.x = globals.bootid.x;
|
||||
out->mi_bootid.current.y = globals.bootid.y;
|
||||
out->mi_mode = env->lck_mmap.lck ? lck->envmode.weak : env->flags;
|
||||
|
||||
if (likely(bytes > size_before_pgop_stat)) {
|
||||
#if MDBX_ENABLE_PGOP_STAT
|
||||
out->mi_pgop_stat.newly = atomic_load64(&lck->pgops.newly, mo_Relaxed);
|
||||
out->mi_pgop_stat.cow = atomic_load64(&lck->pgops.cow, mo_Relaxed);
|
||||
out->mi_pgop_stat.clone = atomic_load64(&lck->pgops.clone, mo_Relaxed);
|
||||
out->mi_pgop_stat.split = atomic_load64(&lck->pgops.split, mo_Relaxed);
|
||||
out->mi_pgop_stat.merge = atomic_load64(&lck->pgops.merge, mo_Relaxed);
|
||||
out->mi_pgop_stat.spill = atomic_load64(&lck->pgops.spill, mo_Relaxed);
|
||||
out->mi_pgop_stat.unspill = atomic_load64(&lck->pgops.unspill, mo_Relaxed);
|
||||
out->mi_pgop_stat.wops = atomic_load64(&lck->pgops.wops, mo_Relaxed);
|
||||
out->mi_pgop_stat.prefault = atomic_load64(&lck->pgops.prefault, mo_Relaxed);
|
||||
out->mi_pgop_stat.mincore = atomic_load64(&lck->pgops.mincore, mo_Relaxed);
|
||||
out->mi_pgop_stat.msync = atomic_load64(&lck->pgops.msync, mo_Relaxed);
|
||||
out->mi_pgop_stat.fsync = atomic_load64(&lck->pgops.fsync, mo_Relaxed);
|
||||
out->mi_pgop_stat.newly = atomic_load64(&lck->pgops.newly, mo_Relaxed);
|
||||
out->mi_pgop_stat.cow = atomic_load64(&lck->pgops.cow, mo_Relaxed);
|
||||
out->mi_pgop_stat.clone = atomic_load64(&lck->pgops.clone, mo_Relaxed);
|
||||
out->mi_pgop_stat.split = atomic_load64(&lck->pgops.split, mo_Relaxed);
|
||||
out->mi_pgop_stat.merge = atomic_load64(&lck->pgops.merge, mo_Relaxed);
|
||||
out->mi_pgop_stat.spill = atomic_load64(&lck->pgops.spill, mo_Relaxed);
|
||||
out->mi_pgop_stat.unspill = atomic_load64(&lck->pgops.unspill, mo_Relaxed);
|
||||
out->mi_pgop_stat.wops = atomic_load64(&lck->pgops.wops, mo_Relaxed);
|
||||
out->mi_pgop_stat.prefault = atomic_load64(&lck->pgops.prefault, mo_Relaxed);
|
||||
out->mi_pgop_stat.mincore = atomic_load64(&lck->pgops.mincore, mo_Relaxed);
|
||||
out->mi_pgop_stat.msync = atomic_load64(&lck->pgops.msync, mo_Relaxed);
|
||||
out->mi_pgop_stat.fsync = atomic_load64(&lck->pgops.fsync, mo_Relaxed);
|
||||
#else
|
||||
memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat));
|
||||
memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat));
|
||||
#endif /* MDBX_ENABLE_PGOP_STAT*/
|
||||
}
|
||||
|
||||
txnid_t overall_latter_reader_txnid = out->mi_recent_txnid;
|
||||
txnid_t self_latter_reader_txnid = overall_latter_reader_txnid;
|
||||
@@ -800,22 +844,21 @@ static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika) {
|
||||
__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, troika_t *troika) {
|
||||
MDBX_envinfo snap;
|
||||
int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika);
|
||||
int rc = env_info_snap(env, txn, &snap, troika);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
eASSERT(env, sizeof(snap) >= bytes);
|
||||
while (1) {
|
||||
rc = env_info_snap(env, txn, out, bytes, troika);
|
||||
rc = env_info_snap(env, txn, out, troika);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16;
|
||||
snap.mi_since_reader_check_seconds16dot16 = out->mi_since_reader_check_seconds16dot16;
|
||||
if (likely(memcmp(&snap, out, bytes) == 0))
|
||||
if (likely(memcmp(&snap, out, sizeof(MDBX_envinfo)) == 0))
|
||||
return MDBX_SUCCESS;
|
||||
memcpy(&snap, out, bytes);
|
||||
memcpy(&snap, out, sizeof(MDBX_envinfo));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -823,11 +866,7 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envin
|
||||
if (unlikely((env == nullptr && txn == nullptr) || arg == nullptr))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid);
|
||||
const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
|
||||
const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid);
|
||||
if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat &&
|
||||
bytes != size_before_dxbid)
|
||||
if (unlikely(bytes != sizeof(MDBX_envinfo)))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (txn) {
|
||||
@@ -846,7 +885,7 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envin
|
||||
}
|
||||
|
||||
troika_t troika;
|
||||
return LOG_IFERR(env_info(env, txn, arg, bytes, &troika));
|
||||
return LOG_IFERR(env_info(env, txn, arg, &troika));
|
||||
}
|
||||
|
||||
__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, size_t bytes) {
|
||||
@@ -865,27 +904,18 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, si
|
||||
if (unlikely(!out))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid);
|
||||
const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
|
||||
const size_t size_before_dxbid = offsetof(MDBX_envinfo, mi_dxbid);
|
||||
if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && bytes != size_before_pgop_stat &&
|
||||
bytes != size_before_dxbid)
|
||||
if (unlikely(bytes != sizeof(MDBX_envinfo)))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
memset(out, 0, bytes);
|
||||
if (likely(bytes > size_before_bootid)) {
|
||||
out->mi_bootid.current.x = globals.bootid.x;
|
||||
out->mi_bootid.current.y = globals.bootid.y;
|
||||
}
|
||||
|
||||
MDBX_env env;
|
||||
memset(&env, 0, sizeof(env));
|
||||
env.pid = osal_getpid();
|
||||
if (unlikely(!is_powerof2(globals.sys_pagesize) || globals.sys_pagesize < MDBX_MIN_PAGESIZE)) {
|
||||
ERROR("unsuitable system pagesize %u", globals.sys_pagesize);
|
||||
return LOG_IFERR(MDBX_INCOMPATIBLE);
|
||||
}
|
||||
out->mi_sys_pagesize = globals.sys_pagesize;
|
||||
|
||||
memset(out, 0, bytes);
|
||||
MDBX_env env;
|
||||
memset(&env, 0, sizeof(env));
|
||||
env.pid = osal_getpid();
|
||||
env.flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION;
|
||||
env.stuck_meta = -1;
|
||||
env.lck_mmap.fd = INVALID_HANDLE_VALUE;
|
||||
@@ -894,11 +924,12 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, si
|
||||
env.fd4meta = INVALID_HANDLE_VALUE;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
env.dxb_lock_event = INVALID_HANDLE_VALUE;
|
||||
env.lck_lock_event = INVALID_HANDLE_VALUE;
|
||||
env.ioring.overlapped_fd = INVALID_HANDLE_VALUE;
|
||||
#endif /* Windows */
|
||||
env_options_init(&env);
|
||||
|
||||
int rc = env_handle_pathname(&env, pathname, 0);
|
||||
int err, rc = env_handle_pathname(&env, pathname, 0);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.pathname.dxb, &env.lazy_fd, 0);
|
||||
@@ -918,17 +949,17 @@ __cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, si
|
||||
out->mi_geo.current = pgno2bytes(&env, header.geometry.now);
|
||||
out->mi_last_pgno = header.geometry.first_unallocated - 1;
|
||||
|
||||
const unsigned n = 0;
|
||||
out->mi_recent_txnid = constmeta_txnid(&header);
|
||||
const unsigned n = 0;
|
||||
out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.sign);
|
||||
if (likely(bytes > size_before_bootid)) {
|
||||
memcpy(&out->mi_bootid.meta[n], &header.bootid, 16);
|
||||
if (likely(bytes > size_before_dxbid))
|
||||
memcpy(&out->mi_dxbid, &header.dxbid, 16);
|
||||
}
|
||||
memcpy(&out->mi_bootid.meta[n], &header.bootid, 16);
|
||||
memcpy(&out->mi_dxbid, &header.dxbid, 16);
|
||||
|
||||
bailout:
|
||||
env_close(&env, false);
|
||||
err = env_info_sys(&env, out);
|
||||
rc = rc ? rc : err;
|
||||
err = env_close(&env, false);
|
||||
rc = rc ? rc : err;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
@@ -1100,13 +1131,16 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t si
|
||||
}
|
||||
|
||||
const size_t unit_ps = (globals.sys_pagesize > (size_t)pagesize) ? globals.sys_pagesize : (size_t)pagesize;
|
||||
const size_t unit_ag = (globals.sys_allocation_granularity > unit_ps) ? globals.sys_allocation_granularity : unit_ps;
|
||||
const size_t unit_ag = (globals.sys_allocation_granularity > unit_ps &&
|
||||
(growth_step < 0 || (size_t)growth_step >= globals.sys_allocation_granularity))
|
||||
? globals.sys_allocation_granularity
|
||||
: unit_ps;
|
||||
size_lower = ceil_powerof2(size_lower, unit_ps);
|
||||
size_upper = ceil_powerof2(size_upper, unit_ag);
|
||||
size_now = ceil_powerof2(size_now, unit_ag);
|
||||
|
||||
/* LY: подбираем значение size_upper:
|
||||
* - кратное размеру страницы
|
||||
* - кратное размеру unit_ag (размеру страницы БД и системному размеру выделения)
|
||||
* - без нарушения MAX_MAPSIZE и MAX_PAGENO */
|
||||
while (unlikely((size_t)size_upper > MAX_MAPSIZE || (uint64_t)size_upper / pagesize > MAX_PAGENO + 1)) {
|
||||
if ((size_t)size_upper < unit_ag + MIN_MAPSIZE || (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) {
|
||||
|
||||
@@ -163,3 +163,29 @@ int mdbx_txn_unlock(MDBX_env *env) {
|
||||
lck_txn_unlock(env);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Auxiliary */
|
||||
|
||||
__cold const char *mdbx_ratio2digits(uint64_t numerator, uint64_t denominator, int precision, char *buffer,
|
||||
size_t buffer_size) {
|
||||
if (!buffer)
|
||||
return "nullptr";
|
||||
else if (buffer_size < sizeof(ratio2digits_buffer_t))
|
||||
return "buffer-to-small";
|
||||
else if (!denominator)
|
||||
return numerator ? "infinity" : "undefined";
|
||||
else
|
||||
return ratio2digits(numerator, denominator, (ratio2digits_buffer_t *)buffer, precision);
|
||||
}
|
||||
|
||||
__cold const char *mdbx_ratio2percents(uint64_t value, uint64_t whole, char *buffer, size_t buffer_size) {
|
||||
if (!buffer)
|
||||
return "nullptr";
|
||||
else if (buffer_size < sizeof(ratio2digits_buffer_t))
|
||||
return "buffer-to-small";
|
||||
else if (!whole)
|
||||
return value ? "infinity" : "undefined";
|
||||
else
|
||||
return ratio2percent(value, whole, (ratio2digits_buffer_t *)buffer);
|
||||
}
|
||||
|
||||
@@ -124,7 +124,7 @@ uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) {
|
||||
const uint64_t exponent = (uint64_t)IEEE754_DOUBLE_EXPONENTA_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - shift;
|
||||
assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX);
|
||||
const uint64_t key = bias + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) + (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD);
|
||||
#if !defined(_MSC_VER) || defined(_DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \
|
||||
#if !defined(_MSC_VER) || !MDBX_WITHOUT_MSVC_CRT /* Workaround for MSVC error LNK2019: unresolved external \
|
||||
symbol __except1 referenced in function __ftol3_except */
|
||||
assert(key == mdbx_key_from_double((double)json_integer));
|
||||
#endif /* Workaround for MSVC */
|
||||
@@ -146,7 +146,7 @@ uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) {
|
||||
assert(exponent > 0 && exponent <= IEEE754_DOUBLE_EXPONENTA_MAX);
|
||||
const uint64_t key =
|
||||
bias - 1 - (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) - (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD);
|
||||
#if !defined(_MSC_VER) || defined(_DEBUG) /* Workaround for MSVC error LNK2019: unresolved external \
|
||||
#if !defined(_MSC_VER) || !MDBX_WITHOUT_MSVC_CRT /* Workaround for MSVC error LNK2019: unresolved external \
|
||||
symbol __except1 referenced in function __ftol3_except */
|
||||
assert(key == mdbx_key_from_double((double)json_integer));
|
||||
#endif /* Workaround for MSVC */
|
||||
|
||||
@@ -37,7 +37,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, uint64_t in
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) {
|
||||
rc = tbl_fetch(txn, dbi);
|
||||
rc = tbl_refresh_absent2baddbi(txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
@@ -146,8 +146,7 @@ __cold const char *mdbx_liberr2str(int errnum) {
|
||||
" or Operation system not supported such operations",
|
||||
"MDBX_INCOMPATIBLE: Environment or database is not compatible"
|
||||
" with the requested operation or the specified flags",
|
||||
"MDBX_BAD_RSLOT: Invalid reuse of reader locktable slot,"
|
||||
" e.g. read-transaction already run for current thread",
|
||||
"MDBX_BAD_RSLOT: Reader locktable slot was unexpectly reused or cleared by an enemy thread",
|
||||
"MDBX_BAD_TXN: Transaction is not valid for requested operation,"
|
||||
" e.g. had errored and be must aborted, has a child, or is invalid",
|
||||
"MDBX_BAD_VALSIZE: Invalid size or alignment of key or data"
|
||||
|
||||
@@ -7,8 +7,7 @@
|
||||
/* LY: avoid tsan-trap by txn, mm_last_pg and geo.first_unallocated */
|
||||
__attribute__((__no_sanitize_thread__, __noinline__))
|
||||
#endif
|
||||
int mdbx_txn_straggler(const MDBX_txn *txn, int *percent)
|
||||
{
|
||||
int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) {
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED);
|
||||
if (likely(rc == MDBX_SUCCESS))
|
||||
rc = check_env(txn->env, true);
|
||||
|
||||
@@ -7,6 +7,33 @@
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
MDBX_MAYBE_UNUSED static __always_inline void atomic_yield(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
YieldProcessor();
|
||||
#elif defined(__ia32__) || defined(__e2k__)
|
||||
__builtin_ia32_pause();
|
||||
#elif defined(__ia64__)
|
||||
#if defined(__HP_cc__) || defined(__HP_aCC__)
|
||||
_Asm_hint(_HINT_PAUSE);
|
||||
#else
|
||||
__asm__ __volatile__("hint @pause");
|
||||
#endif
|
||||
#elif defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH > 6) || defined(__ARM_ARCH_6K__)
|
||||
#ifdef __CC_ARM
|
||||
__yield();
|
||||
#else
|
||||
__asm__ __volatile__("yield");
|
||||
#endif
|
||||
#elif (defined(__mips64) || defined(__mips64__)) && defined(__mips_isa_rev) && __mips_isa_rev >= 2
|
||||
__asm__ __volatile__("pause");
|
||||
#elif defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) || \
|
||||
defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi)
|
||||
__asm__ __volatile__(".word 0x00000140");
|
||||
#else
|
||||
osal_yield();
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef MDBX_HAVE_C11ATOMICS
|
||||
#define osal_memory_fence(order, write) atomic_thread_fence((write) ? mo_c11_store(order) : mo_c11_load(order))
|
||||
#else /* MDBX_HAVE_C11ATOMICS */
|
||||
@@ -115,8 +142,7 @@ MDBX_MAYBE_UNUSED static
|
||||
#if MDBX_64BIT_ATOMIC
|
||||
__always_inline
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
uint64_t
|
||||
atomic_load64(const volatile mdbx_atomic_uint64_t *p, enum mdbx_memory_order order) {
|
||||
uint64_t atomic_load64(const volatile mdbx_atomic_uint64_t *p, enum mdbx_memory_order order) {
|
||||
STATIC_ASSERT(sizeof(mdbx_atomic_uint64_t) == 8);
|
||||
#if MDBX_64BIT_ATOMIC
|
||||
#ifdef MDBX_HAVE_C11ATOMICS
|
||||
@@ -144,38 +170,12 @@ MDBX_MAYBE_UNUSED static
|
||||
if (likely(value == again))
|
||||
return value;
|
||||
value = again;
|
||||
atomic_yield();
|
||||
}
|
||||
#endif /* !MDBX_64BIT_ATOMIC */
|
||||
}
|
||||
#endif /* atomic_load64 */
|
||||
|
||||
MDBX_MAYBE_UNUSED static __always_inline void atomic_yield(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
YieldProcessor();
|
||||
#elif defined(__ia32__) || defined(__e2k__)
|
||||
__builtin_ia32_pause();
|
||||
#elif defined(__ia64__)
|
||||
#if defined(__HP_cc__) || defined(__HP_aCC__)
|
||||
_Asm_hint(_HINT_PAUSE);
|
||||
#else
|
||||
__asm__ __volatile__("hint @pause");
|
||||
#endif
|
||||
#elif defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH > 6) || defined(__ARM_ARCH_6K__)
|
||||
#ifdef __CC_ARM
|
||||
__yield();
|
||||
#else
|
||||
__asm__ __volatile__("yield");
|
||||
#endif
|
||||
#elif (defined(__mips64) || defined(__mips64__)) && defined(__mips_isa_rev) && __mips_isa_rev >= 2
|
||||
__asm__ __volatile__("pause");
|
||||
#elif defined(__mips) || defined(__mips__) || defined(__mips64) || defined(__mips64__) || defined(_M_MRX000) || \
|
||||
defined(_MIPS_) || defined(__MWERKS__) || defined(__sgi)
|
||||
__asm__ __volatile__(".word 0x00000140");
|
||||
#else
|
||||
osal_yield();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if MDBX_64BIT_CAS
|
||||
MDBX_MAYBE_UNUSED static __always_inline bool atomic_cas64(mdbx_atomic_uint64_t *p, uint64_t c, uint64_t v) {
|
||||
#ifdef MDBX_HAVE_C11ATOMICS
|
||||
@@ -312,10 +312,11 @@ MDBX_MAYBE_UNUSED static __always_inline void safe64_write(mdbx_atomic_uint64_t
|
||||
|
||||
MDBX_MAYBE_UNUSED static __always_inline uint64_t safe64_read(const mdbx_atomic_uint64_t *p) {
|
||||
jitter4testing(true);
|
||||
uint64_t v;
|
||||
do
|
||||
uint64_t v = atomic_load64(p, mo_AcquireRelease);
|
||||
while (!MDBX_64BIT_ATOMIC && unlikely(v != p->weak)) {
|
||||
atomic_yield();
|
||||
v = atomic_load64(p, mo_AcquireRelease);
|
||||
while (!MDBX_64BIT_ATOMIC && unlikely(v != p->weak));
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
@@ -353,8 +354,7 @@ MDBX_MAYBE_UNUSED static
|
||||
#if MDBX_64BIT_ATOMIC
|
||||
__always_inline
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
void
|
||||
safe64_inc(mdbx_atomic_uint64_t *p, const uint64_t v) {
|
||||
void safe64_inc(mdbx_atomic_uint64_t *p, const uint64_t v) {
|
||||
assert(v > 0);
|
||||
safe64_update(p, safe64_read(p) + v);
|
||||
}
|
||||
|
||||
320
src/chk.c
320
src/chk.c
@@ -196,6 +196,49 @@ __cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, const char
|
||||
return line;
|
||||
}
|
||||
|
||||
__cold static MDBX_chk_line_t *chk_print_ratio(MDBX_chk_line_t *line, size_t numerator, size_t denominator,
|
||||
unsigned precision) {
|
||||
if (line) {
|
||||
ratio2digits_buffer_t buffer;
|
||||
line = chk_puts(line, ratio2digits(numerator, denominator, &buffer, precision));
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
__cold static MDBX_chk_line_t *chk_print_percent(MDBX_chk_line_t *line, const char *triplet, size_t value, size_t whole,
|
||||
const char *unit) {
|
||||
if (line) {
|
||||
const char *s1 = triplet;
|
||||
const char *s2 = s1 + strlen(s1) + 1;
|
||||
const char *s3 = s2 + strlen(s2) + 1;
|
||||
ratio2digits_buffer_t buffer;
|
||||
line = chk_print(line, "%s %" PRIuSIZE "%s%s (%s%%%s)%s", s1, value, unit, &"s"[*unit == 0 || value == 1],
|
||||
ratio2percent(value, whole, &buffer), s2, s3);
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
__cold static MDBX_chk_line_t *chk_print_pages_percent(MDBX_chk_line_t *line, const char *triplet, size_t pages,
|
||||
size_t whole) {
|
||||
return chk_print_percent(line, triplet, pages, whole, " page");
|
||||
}
|
||||
|
||||
__cold static MDBX_chk_line_t *chk_print_bytes_percent(MDBX_chk_line_t *line, const char *triplet, size_t pages,
|
||||
size_t whole) {
|
||||
return chk_print_percent(line, triplet, pages, whole, " byte");
|
||||
}
|
||||
|
||||
__cold static MDBX_chk_line_t *chk_print_pages_percent_bb(MDBX_chk_line_t *line, const char *prefix, size_t pages,
|
||||
size_t backed, size_t boundary) {
|
||||
if (line) {
|
||||
ratio2digits_buffer_t buffer_backed, buffer_boundary;
|
||||
line =
|
||||
chk_print(line, "%s %" PRIuSIZE " page%s (%s%% of backed, %s%% of boundary)", prefix, pages, &"s"[pages == 1],
|
||||
ratio2percent(pages, backed, &buffer_backed), ratio2percent(pages, boundary, &buffer_boundary));
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, const char *subj) {
|
||||
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error);
|
||||
if (line)
|
||||
@@ -568,11 +611,23 @@ static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) {
|
||||
|
||||
__cold static MDBX_chk_line_t *histogram_dist(MDBX_chk_line_t *line, const struct MDBX_chk_histogram *histogram,
|
||||
const char *prefix, const char *first, bool amount) {
|
||||
/* https://en.wikipedia.org/wiki/Multiplication_sign */
|
||||
#if defined(unix) || defined(linux) || defined(__unix__) || defined(__unix) || defined(__linux__) || \
|
||||
defined(__APPLE__) || defined(__MACH__) || defined(_DARWIN_C_SOURCE)
|
||||
#define UNICODE_MULSIGN_STR "×"
|
||||
#define UNICODE_MULSIGN_FMT "s"
|
||||
#elif defined(_WIN32) || defined(_WIN64)
|
||||
#define UNICODE_MULSIGN_STR L"\u00d7"
|
||||
#define UNICODE_MULSIGN_FMT "ls"
|
||||
#else
|
||||
#define UNICODE_MULSIGN_STR "*"
|
||||
#define UNICODE_MULSIGN_FMT "s"
|
||||
#endif
|
||||
line = chk_print(line, "%s:", prefix);
|
||||
const char *comma = "";
|
||||
const size_t first_val = amount ? histogram->ones : histogram->pad;
|
||||
if (first_val) {
|
||||
chk_print(line, " %s=%" PRIuSIZE, first, first_val);
|
||||
chk_print(line, " %s%" UNICODE_MULSIGN_FMT "%" PRIuSIZE, first, UNICODE_MULSIGN_STR, first_val);
|
||||
comma = ",";
|
||||
}
|
||||
for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n)
|
||||
@@ -580,7 +635,8 @@ __cold static MDBX_chk_line_t *histogram_dist(MDBX_chk_line_t *line, const struc
|
||||
chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin);
|
||||
if (histogram->ranges[n].begin != histogram->ranges[n].end - 1)
|
||||
chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1);
|
||||
line = chk_print(line, "=%" PRIuSIZE, amount ? histogram->ranges[n].amount : histogram->ranges[n].count);
|
||||
line = chk_print(line, "%" UNICODE_MULSIGN_FMT "%" PRIuSIZE, UNICODE_MULSIGN_STR,
|
||||
amount ? histogram->ranges[n].amount : histogram->ranges[n].count);
|
||||
comma = ",";
|
||||
}
|
||||
return line;
|
||||
@@ -668,7 +724,7 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, const unsigne
|
||||
line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid);
|
||||
if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y)
|
||||
line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", chk->envinfo.mi_bootid.meta[num].x,
|
||||
chk->envinfo.mi_bootid.meta[num].y, bootid_match ? "live" : "not match");
|
||||
chk->envinfo.mi_bootid.meta[num].y, bootid_match ? "live" : "dissimilar");
|
||||
else
|
||||
line = chk_puts(line, "no boot-id");
|
||||
|
||||
@@ -701,7 +757,8 @@ __cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *
|
||||
chk_scope_issue(scope, "too deeply %u, page %zu, parent %zu", deep, pgno, parent_pgno);
|
||||
return MDBX_CORRUPTED /* avoid infinite loop/recursion */;
|
||||
}
|
||||
histogram_acc(deep, &tbl->histogram.deep);
|
||||
if (pagetype != page_large)
|
||||
histogram_acc(deep, &tbl->histogram.height);
|
||||
usr->result.processed_pages += npages;
|
||||
const size_t page_bytes = payload_bytes + header_bytes + unused_bytes;
|
||||
|
||||
@@ -722,27 +779,28 @@ __cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *
|
||||
|
||||
const char *pagetype_caption;
|
||||
bool branch = false;
|
||||
struct MDBX_chk_histogram *filling = nullptr;
|
||||
struct MDBX_chk_histogram *density = nullptr;
|
||||
switch (pagetype) {
|
||||
default:
|
||||
chk_object_issue(scope, "page", pgno, "unknown page-type", "type %u, deep %i, parent %zu", (unsigned)pagetype, deep,
|
||||
parent_pgno);
|
||||
pagetype_caption = "unknown";
|
||||
tbl->pages.other += npages;
|
||||
tbl->pages.broken += npages;
|
||||
break;
|
||||
case page_broken:
|
||||
assert(page_err != MDBX_SUCCESS);
|
||||
pagetype_caption = "broken";
|
||||
tbl->pages.other += npages;
|
||||
tbl->pages.broken += npages;
|
||||
break;
|
||||
case page_sub_broken:
|
||||
assert(page_err != MDBX_SUCCESS);
|
||||
pagetype_caption = "broken-subpage";
|
||||
tbl->pages.other += npages;
|
||||
tbl->pages.broken += npages;
|
||||
break;
|
||||
case page_large:
|
||||
pagetype_caption = "large";
|
||||
histogram_acc(npages, &tbl->histogram.large_pages);
|
||||
density = &tbl->histogram.large_or_nested_density;
|
||||
if (tbl->flags & MDBX_DUPSORT)
|
||||
chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i, parent %zu",
|
||||
(unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep, parent_pgno);
|
||||
@@ -752,11 +810,11 @@ __cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *
|
||||
if (!nested) {
|
||||
pagetype_caption = "branch";
|
||||
tbl->pages.branch += 1;
|
||||
filling = &tbl->histogram.tree_filling;
|
||||
density = &tbl->histogram.tree_density;
|
||||
} else {
|
||||
pagetype_caption = "nested-branch";
|
||||
tbl->pages.nested_branch += 1;
|
||||
filling = &tbl->histogram.nested_tree_filling;
|
||||
density = &tbl->histogram.large_or_nested_density;
|
||||
}
|
||||
break;
|
||||
case page_dupfix_leaf:
|
||||
@@ -769,16 +827,16 @@ __cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *
|
||||
if (!nested) {
|
||||
pagetype_caption = "leaf";
|
||||
tbl->pages.leaf += 1;
|
||||
filling = &tbl->histogram.tree_filling;
|
||||
density = &tbl->histogram.tree_density;
|
||||
if (height != tbl_info->internal->height)
|
||||
chk_object_issue(scope, "page", pgno, "wrong tree height", "actual %i != %i table %s, parent %zu", height,
|
||||
tbl_info->internal->height, chk_v2a(chk, &tbl->name), parent_pgno);
|
||||
} else {
|
||||
pagetype_caption = (pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix";
|
||||
tbl->pages.nested_leaf += 1;
|
||||
filling = &tbl->histogram.nested_tree_filling;
|
||||
density = &tbl->histogram.large_or_nested_density;
|
||||
if (chk->last_nested != nested) {
|
||||
histogram_acc(height, &tbl->histogram.nested_tree);
|
||||
histogram_acc(height, &tbl->histogram.nested_height);
|
||||
chk->last_nested = nested;
|
||||
}
|
||||
if (height != nested->height)
|
||||
@@ -794,12 +852,12 @@ __cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *
|
||||
chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i, parent %zu",
|
||||
(unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep, parent_pgno);
|
||||
else
|
||||
filling = &tbl->histogram.nested_tree_filling;
|
||||
density = &tbl->histogram.large_or_nested_density;
|
||||
break;
|
||||
}
|
||||
|
||||
if (filling)
|
||||
histogram_acc((page_size - unused_bytes) * 100 / page_size, filling);
|
||||
if (density)
|
||||
histogram_acc((page_size - unused_bytes) * 100 / page_size, density);
|
||||
|
||||
if (npages) {
|
||||
if (tbl->cookie) {
|
||||
@@ -914,7 +972,7 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) {
|
||||
total.lost_bytes += tbl->lost_bytes;
|
||||
total.pages.all += tbl->pages.all;
|
||||
total.pages.empty += tbl->pages.empty;
|
||||
total.pages.other += tbl->pages.other;
|
||||
total.pages.broken += tbl->pages.broken;
|
||||
total.pages.branch += tbl->pages.branch;
|
||||
total.pages.leaf += tbl->pages.leaf;
|
||||
total.pages.nested_branch += tbl->pages.nested_branch;
|
||||
@@ -934,53 +992,55 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) {
|
||||
if (scope->verbosity > MDBX_chk_info) {
|
||||
for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) {
|
||||
MDBX_chk_table_t *const tbl = chk->table[i];
|
||||
MDBX_chk_scope_t *inner = chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &tbl->name));
|
||||
if (tbl->pages.all == 0)
|
||||
chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty"));
|
||||
else {
|
||||
MDBX_chk_scope_t *inner =
|
||||
chk_scope_push(scope, 0, (tbl->pages.all ? "b-tree %s, subtotal %" PRIuSIZE " pages:" : "b-tree %s: empty"),
|
||||
chk_v2a(chk, &tbl->name), tbl->pages.all);
|
||||
if (tbl->pages.all) {
|
||||
MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info);
|
||||
if (line) {
|
||||
line = chk_print(line, "page usage: subtotal %" PRIuSIZE, tbl->pages.all);
|
||||
const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch;
|
||||
const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf + tbl->pages.nested_subleaf;
|
||||
if (tbl->pages.other)
|
||||
line = chk_print(line, ", other %" PRIuSIZE, tbl->pages.other);
|
||||
if (tbl->pages.other == 0 || (branch_pages | leaf_pages | tbl->histogram.large_pages.count) != 0) {
|
||||
line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, branch_pages, leaf_pages);
|
||||
line = chk_puts(line, "pages composition: ");
|
||||
if (tbl->pages.broken)
|
||||
line = chk_print(line, "broken %" PRIuSIZE ", ", tbl->pages.broken);
|
||||
if (tbl->pages.broken != tbl->pages.all) {
|
||||
line = chk_print(line, "branch %" PRIuSIZE ", leaf %" PRIuSIZE, tbl->pages.branch, tbl->pages.leaf);
|
||||
if (tbl->pages.nested_subleaf || (tbl->flags & MDBX_DUPSORT) != 0)
|
||||
line = chk_print(line, ", subleaf %" PRIuSIZE, tbl->pages.nested_subleaf);
|
||||
if (tbl->pages.nested_branch || (tbl->flags & MDBX_DUPSORT) != 0)
|
||||
line = chk_print(line, ", nested-branch %" PRIuSIZE, tbl->pages.nested_branch);
|
||||
if (tbl->pages.nested_leaf || (tbl->flags & MDBX_DUPSORT) != 0)
|
||||
line = chk_print(line, ", nested-leaf %" PRIuSIZE, tbl->pages.nested_leaf);
|
||||
if (tbl->histogram.large_pages.count || (tbl->flags & MDBX_DUPSORT) == 0) {
|
||||
line = chk_print(line, ", large %" PRIuSIZE, tbl->histogram.large_pages.count);
|
||||
if (tbl->histogram.large_pages.amount | tbl->histogram.large_pages.count)
|
||||
line = histogram_print(inner, line, &tbl->histogram.large_pages, " amount", "single", true);
|
||||
}
|
||||
}
|
||||
line = histogram_dist(chk_line_feed(line), &tbl->histogram.deep, "tree deep density", "1", false);
|
||||
if (tbl != &chk->table_gc && tbl->histogram.nested_tree.count) {
|
||||
line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, tbl->histogram.nested_tree.count);
|
||||
line = histogram_dist(line, &tbl->histogram.nested_tree, " density", "1", false);
|
||||
line = chk_print(chk_line_feed(line),
|
||||
"nested tree(s) pages %" PRIuSIZE ": branch %" PRIuSIZE ", leaf %" PRIuSIZE
|
||||
", subleaf %" PRIuSIZE,
|
||||
tbl->pages.nested_branch + tbl->pages.nested_leaf, tbl->pages.nested_branch,
|
||||
tbl->pages.nested_leaf, tbl->pages.nested_subleaf);
|
||||
}
|
||||
line = histogram_dist(chk_line_feed(line), &tbl->histogram.height, "tree levels", "1", false);
|
||||
if ((tbl->flags & MDBX_DUPSORT) != 0 || (tbl->histogram.nested_height.count && tbl != &chk->table_gc)) {
|
||||
line = chk_print(chk_line_feed(line),
|
||||
"nested tree(s): quantity %" PRIuSIZE ", subtotal pages %" PRIuSIZE ", ",
|
||||
tbl->histogram.nested_height.count, tbl->pages.nested_branch + tbl->pages.nested_leaf);
|
||||
if (tbl != &chk->table_gc && tbl->histogram.nested_height.count)
|
||||
line = histogram_dist(line, &tbl->histogram.nested_height, "levels", "1", false);
|
||||
}
|
||||
line = chk_line_feed(line);
|
||||
|
||||
const size_t bytes = pgno2bytes(env, tbl->pages.all);
|
||||
line =
|
||||
chk_print(chk_line_feed(line),
|
||||
"page filling: subtotal %" PRIuSIZE " bytes (%.1f%%), payload %" PRIuSIZE
|
||||
" (%.1f%%), unused %" PRIuSIZE " (%.1f%%)",
|
||||
bytes, bytes * 100.0 / total_page_bytes, tbl->payload_bytes, tbl->payload_bytes * 100.0 / bytes,
|
||||
bytes - tbl->payload_bytes, (bytes - tbl->payload_bytes) * 100.0 / bytes);
|
||||
if (tbl->pages.empty)
|
||||
line = chk_print(line, ", %" PRIuSIZE " empty pages", tbl->pages.empty);
|
||||
if (tbl->lost_bytes)
|
||||
line = chk_print(line, ", %" PRIuSIZE " bytes lost", tbl->lost_bytes);
|
||||
const size_t bytes = pgno2bytes(env, tbl->pages.all);
|
||||
line = chk_print_bytes_percent(line, "pages density: subtotal\0\0", bytes, total_page_bytes);
|
||||
line = chk_print_percent(line, ", payload\0\0", tbl->payload_bytes, bytes, "");
|
||||
line = chk_print_percent(line, ", unused\0\0", bytes - tbl->payload_bytes, bytes, "");
|
||||
if (tbl->pages.empty)
|
||||
line = chk_print(line, ", %" PRIuSIZE " empty pages", tbl->pages.empty);
|
||||
if (tbl->lost_bytes)
|
||||
line = chk_print(line, ", %" PRIuSIZE " bytes lost", tbl->lost_bytes);
|
||||
|
||||
line =
|
||||
histogram_dist(chk_line_feed(line), &tbl->histogram.tree_filling, "tree %-filling density", "1", false);
|
||||
if (tbl->histogram.nested_tree_filling.count)
|
||||
line = histogram_dist(chk_line_feed(line), &tbl->histogram.nested_tree_filling,
|
||||
"nested tree(s) %-filling density", "1", false);
|
||||
line = histogram_dist(chk_line_feed(line), &tbl->histogram.tree_density, "pages %-density distribution",
|
||||
"1", false);
|
||||
if (tbl->histogram.large_or_nested_density.count)
|
||||
line = histogram_dist(chk_line_feed(line), &tbl->histogram.large_or_nested_density,
|
||||
(tbl->flags & MDBX_DUPSORT) ? "nested %-density distribution"
|
||||
: "large pages %-density distribution",
|
||||
"1", false);
|
||||
}
|
||||
chk_line_end(line);
|
||||
}
|
||||
}
|
||||
@@ -989,14 +1049,11 @@ __cold static int chk_tree(MDBX_chk_scope_t *const scope) {
|
||||
}
|
||||
|
||||
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution);
|
||||
line = chk_print(line,
|
||||
"summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE " (%.1f%%), unused %" PRIuSIZE " (%.1f%%),"
|
||||
" average fill %.1f%%",
|
||||
total_page_bytes, usr->result.total_payload_bytes,
|
||||
usr->result.total_payload_bytes * 100.0 / total_page_bytes,
|
||||
total_page_bytes - usr->result.total_payload_bytes,
|
||||
(total_page_bytes - usr->result.total_payload_bytes) * 100.0 / total_page_bytes,
|
||||
usr->result.total_payload_bytes * 100.0 / total_page_bytes);
|
||||
line = chk_print(line, "summary: total %" PRIuSIZE " bytes", total_page_bytes);
|
||||
line =
|
||||
chk_print_percent(line, ", payload\0 average density\0", usr->result.total_payload_bytes, total_page_bytes, "");
|
||||
line = chk_print_percent(line, ", unused\0 average sparsity\0", total_page_bytes - usr->result.total_payload_bytes,
|
||||
total_page_bytes, "");
|
||||
if (total.pages.empty)
|
||||
line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty);
|
||||
if (total.lost_bytes)
|
||||
@@ -1285,15 +1342,16 @@ bailout:
|
||||
if (handler) {
|
||||
if (record_count) {
|
||||
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info);
|
||||
line = histogram_dist(line, &tbl->histogram.key_len, "key length density", "0/1", false);
|
||||
line = histogram_dist(line, &tbl->histogram.key_len, "key length distribution", "0/1", false);
|
||||
chk_line_feed(line);
|
||||
line = histogram_dist(line, &tbl->histogram.val_len, "value length density", "0/1", false);
|
||||
line = histogram_dist(line, &tbl->histogram.val_len, "value length distribution", "0/1", false);
|
||||
if (tbl->histogram.multival.amount) {
|
||||
chk_line_feed(line);
|
||||
line = histogram_dist(line, &tbl->histogram.multival, "number of multi-values density", "single", false);
|
||||
line = histogram_dist(line, &tbl->histogram.multival, "number of multi-values distribution", "single", false);
|
||||
chk_line_feed(line);
|
||||
line = chk_print(line, "number of keys %" PRIuSIZE ", average values per key %.1f",
|
||||
tbl->histogram.multival.count, record_count / (double)tbl->histogram.multival.count);
|
||||
line =
|
||||
chk_print(line, "number of keys %" PRIuSIZE ", average values per key ", tbl->histogram.multival.count);
|
||||
line = chk_print_ratio(line, record_count, tbl->histogram.multival.count, 1);
|
||||
}
|
||||
chk_line_end(line);
|
||||
}
|
||||
@@ -1302,16 +1360,20 @@ bailout:
|
||||
if (chk->cb->table_conclude)
|
||||
err = chk->cb->table_conclude(usr, tbl, cursor, err);
|
||||
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution);
|
||||
line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count);
|
||||
if (dups || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)))
|
||||
line = chk_print(line, " %" PRIuSIZE " dups,", dups);
|
||||
if (sub_databases || dbi == MAIN_DBI)
|
||||
line = chk_print(line, " %" PRIuSIZE " tables,", sub_databases);
|
||||
line = chk_print(line,
|
||||
" %" PRIuSIZE " key's bytes,"
|
||||
" %" PRIuSIZE " data's bytes,"
|
||||
" %" PRIuSIZE " problem(s)",
|
||||
tbl->histogram.key_len.amount, tbl->histogram.val_len.amount, scope->subtotal_issues);
|
||||
if (record_count | tbl->histogram.key_len.amount | tbl->histogram.val_len.amount | scope->subtotal_issues) {
|
||||
line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count);
|
||||
if (dups || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)))
|
||||
line = chk_print(line, " %" PRIuSIZE " dups,", dups);
|
||||
if (sub_databases || dbi == MAIN_DBI)
|
||||
line = chk_print(line, " %" PRIuSIZE " tables,", sub_databases);
|
||||
line = chk_print(line,
|
||||
" %" PRIuSIZE " key's bytes,"
|
||||
" %" PRIuSIZE " data's bytes,"
|
||||
" %" PRIuSIZE " problem(s)",
|
||||
tbl->histogram.key_len.amount, tbl->histogram.val_len.amount, scope->subtotal_issues);
|
||||
} else {
|
||||
line = chk_puts(line, "empty");
|
||||
}
|
||||
chk_line_end(chk_flush(line));
|
||||
}
|
||||
|
||||
@@ -1405,7 +1467,7 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, MDBX_chk_table_t
|
||||
iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) : pgno_sub(pgno, span));
|
||||
++span)
|
||||
;
|
||||
histogram_acc(span, &tbl->histogram.nested_tree);
|
||||
histogram_acc(span, &tbl->histogram.nested_height);
|
||||
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra);
|
||||
if (line) {
|
||||
if (span > 1)
|
||||
@@ -1429,7 +1491,7 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) {
|
||||
MDBX_chk_context_t *const usr = chk->usr;
|
||||
MDBX_env *const env = usr->env;
|
||||
MDBX_txn *const txn = usr->txn;
|
||||
int err = env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika);
|
||||
int err = env_info(env, txn, &chk->envinfo, &chk->troika);
|
||||
if (unlikely(err))
|
||||
return chk_error_rc(scope, err, "env_info");
|
||||
|
||||
@@ -1449,9 +1511,11 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) {
|
||||
line = chk_puts(line, "is unavailable");
|
||||
chk_line_end(line);
|
||||
|
||||
err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize);
|
||||
if (unlikely(err))
|
||||
return chk_error_rc(scope, err, "osal_filesize");
|
||||
line = chk_print_size(chk_line_begin(scope, MDBX_chk_verbose), "system unified page cache block ",
|
||||
chk->envinfo.mi_sys_upcblk, "");
|
||||
chk_line_end(line);
|
||||
|
||||
env->dxb_mmap.filesize = chk->envinfo.mi_dxb_fsize;
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
@@ -1503,11 +1567,13 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) {
|
||||
usr->result.backed_pages = (size_t)dxbfile_pages;
|
||||
}
|
||||
|
||||
line = chk_line_feed(chk_print(chk_line_begin(inner, MDBX_chk_info),
|
||||
"pagesize %u (%u system), max keysize %u..%u"
|
||||
", max readers %u",
|
||||
line = chk_line_feed(chk_print(chk_line_begin(inner, MDBX_chk_info), "pagesize %u (%u system), max keysize %u..%u",
|
||||
env->ps, globals.sys_pagesize, mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT),
|
||||
mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->max_readers));
|
||||
mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS)));
|
||||
if ((env->flags & MDBX_EXCLUSIVE) == 0 && env->lck_mmap.lck) {
|
||||
line = chk_line_feed(chk_print(chk_line_begin(inner, MDBX_chk_info), "currently %u readers of %u maximum",
|
||||
atomic_load32(&env->lck_mmap.lck->rdt_length, mo_Relaxed), env->max_readers));
|
||||
}
|
||||
line = chk_line_feed(chk_print_size(line, "mapsize ", env->dxb_mmap.current, nullptr));
|
||||
if (txn->geo.lower == txn->geo.upper)
|
||||
line = chk_print_size(line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr);
|
||||
@@ -1530,6 +1596,19 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) {
|
||||
chk_line_end(chk_print(line, " > until it will be closed or reopened in read-write mode."));
|
||||
}
|
||||
#endif /* Windows || Debug */
|
||||
line = chk_print_size(chk_line_begin(scope, MDBX_chk_verbose), "filesystem: io-block ", chk->envinfo.mi_sys_ioblk,
|
||||
", space allocated for the dxb-file ");
|
||||
if (chk->envinfo.mi_dxb_fallocated == chk->envinfo.mi_geo.current) {
|
||||
line = chk_puts(line, "exactly");
|
||||
} else {
|
||||
line = chk_print_size(
|
||||
line, (chk->envinfo.mi_dxb_fallocated > chk->envinfo.mi_geo.current) ? "with excess " : "partially ",
|
||||
chk->envinfo.mi_dxb_fallocated, " ");
|
||||
ratio2digits_buffer_t buffer;
|
||||
line =
|
||||
chk_print(line, "%s%%", ratio2percent(chk->envinfo.mi_dxb_fallocated, chk->envinfo.mi_geo.current, &buffer));
|
||||
}
|
||||
chk_line_end(line);
|
||||
chk_verbose_meta(inner, 0);
|
||||
chk_verbose_meta(inner, 1);
|
||||
chk_verbose_meta(inner, 2);
|
||||
@@ -1628,7 +1707,7 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) {
|
||||
err = chk_db(usr->scope, FREE_DBI, &chk->table_gc, chk_handle_gc);
|
||||
line = chk_line_begin(scope, MDBX_chk_info);
|
||||
if (line) {
|
||||
histogram_print(scope, line, &chk->table_gc.histogram.nested_tree, "span(s)", "single", false);
|
||||
histogram_print(scope, line, &chk->table_gc.histogram.nested_height, "span(s)", "single", false);
|
||||
chk_line_end(line);
|
||||
}
|
||||
if (usr->result.problems_gc == 0 && (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) {
|
||||
@@ -1646,69 +1725,50 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) {
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, "Page allocation:");
|
||||
const double percent_boundary_reciprocal = 100.0 / txn->geo.upper;
|
||||
const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages;
|
||||
const size_t backed = usr->result.backed_pages;
|
||||
const size_t boundary = txn->geo.upper;
|
||||
const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages;
|
||||
const size_t available2boundary = txn->geo.upper - usr->result.alloc_pages + usr->result.reclaimable_pages;
|
||||
const size_t available2backed = usr->result.backed_pages - usr->result.alloc_pages + usr->result.reclaimable_pages;
|
||||
const size_t remained2boundary = txn->geo.upper - usr->result.alloc_pages;
|
||||
const size_t remained2backed = usr->result.backed_pages - usr->result.alloc_pages;
|
||||
const size_t available2boundary = boundary - usr->result.alloc_pages + usr->result.reclaimable_pages;
|
||||
const size_t available2backed = backed - usr->result.alloc_pages + usr->result.reclaimable_pages;
|
||||
const size_t remained2boundary = boundary - usr->result.alloc_pages;
|
||||
const size_t remained2backed = backed - usr->result.alloc_pages;
|
||||
|
||||
const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) ? usr->result.alloc_pages - usr->result.gc_pages
|
||||
: usr->result.processed_pages;
|
||||
|
||||
line = chk_line_begin(usr->scope, MDBX_chk_info);
|
||||
line = chk_print(line,
|
||||
"backed by file: %" PRIuSIZE " pages (%.1f%%)"
|
||||
", %" PRIuSIZE " left to boundary (%.1f%%)",
|
||||
usr->result.backed_pages, usr->result.backed_pages * percent_boundary_reciprocal,
|
||||
txn->geo.upper - usr->result.backed_pages,
|
||||
(txn->geo.upper - usr->result.backed_pages) * percent_boundary_reciprocal);
|
||||
line = chk_print_pages_percent(line, "backed by file:\0 of boundary\0", backed, boundary);
|
||||
line = chk_print_pages_percent(line, ",\0\0 left to boundary", boundary - backed, boundary);
|
||||
line = chk_line_feed(line);
|
||||
|
||||
line = chk_print(line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "used", used,
|
||||
used * percent_backed_reciprocal, used * percent_boundary_reciprocal);
|
||||
line = chk_print_pages_percent_bb(line, "used:", used, backed, boundary);
|
||||
line = chk_line_feed(line);
|
||||
|
||||
line = chk_print(line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)",
|
||||
"remained", remained2backed, remained2backed * percent_backed_reciprocal, remained2boundary,
|
||||
remained2boundary * percent_boundary_reciprocal);
|
||||
line = chk_print_pages_percent(line, "remained:\0\0 of backed", remained2backed, backed);
|
||||
line = chk_print_pages_percent(line, ", left\0\0 to boundary", remained2boundary, boundary);
|
||||
line = chk_line_feed(line);
|
||||
|
||||
line =
|
||||
chk_print(line,
|
||||
"reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)"
|
||||
", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)",
|
||||
usr->result.reclaimable_pages, usr->result.reclaimable_pages * percent_backed_reciprocal,
|
||||
usr->result.reclaimable_pages * percent_boundary_reciprocal, usr->result.gc_pages,
|
||||
usr->result.gc_pages * percent_backed_reciprocal, usr->result.gc_pages * percent_boundary_reciprocal);
|
||||
line = chk_print_pages_percent_bb(line, "reclaimable:", usr->result.reclaimable_pages, backed, boundary);
|
||||
line = chk_print_pages_percent_bb(line, ", within GC", usr->result.gc_pages, backed, boundary);
|
||||
line = chk_line_feed(line);
|
||||
|
||||
line = chk_print(line,
|
||||
"detained by reader(s): %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)"
|
||||
", %u reader(s), lag %" PRIi64,
|
||||
detained, detained * percent_backed_reciprocal, detained * percent_boundary_reciprocal,
|
||||
chk->envinfo.mi_numreaders, chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid);
|
||||
line = chk_print_pages_percent_bb(line, "detained by reader(s):", detained, backed, boundary);
|
||||
line = chk_print(line, ", %u reader(s), lag %" PRIi64, chk->envinfo.mi_numreaders,
|
||||
chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid);
|
||||
line = chk_line_feed(line);
|
||||
|
||||
line = chk_print(line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "allocated",
|
||||
usr->result.alloc_pages, usr->result.alloc_pages * percent_backed_reciprocal,
|
||||
usr->result.alloc_pages * percent_boundary_reciprocal);
|
||||
line = chk_print_pages_percent_bb(line, "allocated:", usr->result.alloc_pages, backed, boundary);
|
||||
line = chk_line_feed(line);
|
||||
|
||||
line = chk_print(line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)",
|
||||
"available", available2backed, available2backed * percent_backed_reciprocal, available2boundary,
|
||||
available2boundary * percent_boundary_reciprocal);
|
||||
line = chk_print_pages_percent(line, "available:\0 of backed\0", available2backed, backed);
|
||||
line = chk_print_pages_percent(line, ", left\0\0 to boundary", available2boundary, boundary);
|
||||
chk_line_end(line);
|
||||
|
||||
line = chk_line_begin(usr->scope, MDBX_chk_resolution);
|
||||
line = chk_print(line, "%s %" PRIaPGNO " pages", (txn->geo.upper == txn->geo.now) ? "total" : "upto", txn->geo.upper);
|
||||
line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", usr->result.backed_pages,
|
||||
usr->result.backed_pages * percent_boundary_reciprocal);
|
||||
line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", usr->result.alloc_pages,
|
||||
usr->result.alloc_pages * percent_boundary_reciprocal);
|
||||
line = chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary,
|
||||
available2boundary * percent_boundary_reciprocal);
|
||||
line = chk_print(line, "%s %zu pages", (boundary == txn->geo.now) ? "total" : "upto", boundary);
|
||||
line = chk_print_pages_percent(line, ", backed\0\0", backed, boundary);
|
||||
line = chk_print_pages_percent(line, ", allocated\0\0", usr->result.alloc_pages, boundary);
|
||||
line = chk_print_pages_percent(line, ", available\0\0", available2boundary, boundary);
|
||||
chk_line_end(line);
|
||||
chk_scope_restore(scope, err);
|
||||
|
||||
|
||||
19
src/cogs.h
19
src/cogs.h
@@ -200,15 +200,21 @@ static inline bool check_table_flags(unsigned flags) {
|
||||
}
|
||||
}
|
||||
|
||||
static inline int tbl_setup_ifneed(const MDBX_env *env, volatile kvx_t *const kvx, const tree_t *const db) {
|
||||
MDBX_MAYBE_UNUSED static inline int tbl_setup_ifneed(const MDBX_env *env, volatile kvx_t *const kvx,
|
||||
const tree_t *const db) {
|
||||
return likely(kvx->clc.v.lmax) ? MDBX_SUCCESS : tbl_setup(env, kvx, db);
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int tbl_refresh_absent2baddbi(MDBX_txn *txn, size_t dbi) {
|
||||
int rc = tbl_refresh(txn, dbi);
|
||||
return likely(rc != MDBX_NOTFOUND) ? rc : MDBX_BAD_DBI;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
MDBX_NOTHROW_PURE_FUNCTION static inline size_t pgno2bytes(const MDBX_env *env, size_t pgno) {
|
||||
eASSERT(env, (1u << env->ps2ln) == env->ps);
|
||||
return ((size_t)pgno) << env->ps2ln;
|
||||
return pgno << env->ps2ln;
|
||||
}
|
||||
|
||||
MDBX_NOTHROW_PURE_FUNCTION static inline page_t *pgno2page(const MDBX_env *env, size_t pgno) {
|
||||
@@ -286,7 +292,8 @@ MDBX_NOTHROW_PURE_FUNCTION static inline const page_t *payload2page(const void *
|
||||
return container_of(data, page_t, entries);
|
||||
}
|
||||
|
||||
MDBX_NOTHROW_PURE_FUNCTION static inline const page_t *ptr2page(const MDBX_env *env, const void *ptr) {
|
||||
MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline const page_t *ptr2page(const MDBX_env *env,
|
||||
const void *ptr) {
|
||||
eASSERT(env,
|
||||
ptr_dist(ptr, env->dxb_mmap.base) >= 0 && (size_t)ptr_dist(ptr, env->dxb_mmap.base) < env->dxb_mmap.limit);
|
||||
const uintptr_t mask = env->ps - 1;
|
||||
@@ -504,9 +511,9 @@ static inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) {
|
||||
}
|
||||
|
||||
MDBX_NOTHROW_CONST_FUNCTION static inline txnid_t txn_basis_snapshot(const MDBX_txn *txn) {
|
||||
STATIC_ASSERT((MDBX_TXN_RDONLY >> 17) == 1);
|
||||
STATIC_ASSERT((xMDBX_TXNID_STEP >> (xMDBX_TXNID_STEP == 2)) == 1);
|
||||
const txnid_t committed_txnid = txn->txnid + (xMDBX_TXNID_STEP >> (xMDBX_TXNID_STEP == 2)) - ((txn->flags >> 17) & 1);
|
||||
STATIC_ASSERT(((MDBX_TXN_RDONLY >> ((xMDBX_TXNID_STEP == 2) ? 16 : 17)) & xMDBX_TXNID_STEP) == xMDBX_TXNID_STEP);
|
||||
const txnid_t committed_txnid =
|
||||
txn->txnid - xMDBX_TXNID_STEP + ((txn->flags >> ((xMDBX_TXNID_STEP == 2) ? 16 : 17)) & xMDBX_TXNID_STEP);
|
||||
tASSERT(txn, committed_txnid == ((txn->flags & MDBX_TXN_RDONLY) ? txn->txnid : txn->txnid - xMDBX_TXNID_STEP));
|
||||
return committed_txnid;
|
||||
}
|
||||
|
||||
@@ -63,6 +63,11 @@
|
||||
|
||||
#cmakedefine01 MDBX_USE_MINCORE
|
||||
|
||||
#cmakedefine MDBX_USE_FALLOCATE_AUTO
|
||||
#ifndef MDBX_USE_FALLOCATE_AUTO
|
||||
#cmakedefine01 MDBX_USE_FALLOCATE
|
||||
#endif /* MDBX_USE_FALLOCATE */
|
||||
|
||||
/* Build Info */
|
||||
#ifndef MDBX_BUILD_TIMESTAMP
|
||||
#cmakedefine MDBX_BUILD_TIMESTAMP "@MDBX_BUILD_TIMESTAMP@"
|
||||
|
||||
@@ -293,7 +293,7 @@ static __always_inline int couple_init(cursor_couple_t *couple, const MDBX_txn *
|
||||
}
|
||||
|
||||
if (unlikely(*dbi_state & DBI_STALE))
|
||||
return tbl_fetch(couple->outer.txn, cursor_dbi(&couple->outer));
|
||||
return tbl_refresh_absent2baddbi(couple->outer.txn, cursor_dbi(&couple->outer));
|
||||
|
||||
return tbl_setup_ifneed(txn->env, kvx, tree);
|
||||
}
|
||||
|
||||
@@ -236,7 +236,7 @@ enum cursor_checking {
|
||||
MDBX_INTERNAL int __must_check_result cursor_validate(const MDBX_cursor *mc);
|
||||
|
||||
MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline size_t cursor_dbi(const MDBX_cursor *mc) {
|
||||
cASSERT(mc, mc->txn && mc->txn->signature == txn_signature);
|
||||
cASSERT(mc, mc->txn->signature == txn_signature);
|
||||
size_t dbi = mc->dbi_state - mc->txn->dbi_state;
|
||||
cASSERT(mc, dbi < mc->txn->env->n_dbi);
|
||||
return dbi;
|
||||
|
||||
247
src/dbi.c
247
src/dbi.c
@@ -5,7 +5,7 @@
|
||||
|
||||
#if MDBX_ENABLE_DBI_SPARSE
|
||||
size_t dbi_bitmap_ctz_fallback(const MDBX_txn *txn, intptr_t bmi) {
|
||||
tASSERT(txn, bmi > 0);
|
||||
tASSERT(txn, bmi != 0);
|
||||
bmi &= -bmi;
|
||||
if (sizeof(txn->dbi_sparse[0]) > 4) {
|
||||
static const uint8_t debruijn_ctz64[64] = {0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
|
||||
@@ -33,6 +33,23 @@ struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi) {
|
||||
return r;
|
||||
}
|
||||
|
||||
int dbi_gone(MDBX_txn *txn, const size_t dbi, const int rc) {
|
||||
tASSERT(txn, txn->n_dbi > dbi && F_ISSET(txn->dbi_state[dbi], DBI_LINDO | DBI_VALID));
|
||||
for (;;) {
|
||||
unsigned state = txn->dbi_state[dbi];
|
||||
txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO;
|
||||
if (state & (DBI_FRESH | DBI_CREAT))
|
||||
return rc;
|
||||
if (!txn->parent)
|
||||
break;
|
||||
txn = txn->parent;
|
||||
}
|
||||
|
||||
/* TODO: FIXME */
|
||||
txn->dbi_seqs[dbi] = 0;
|
||||
return rc;
|
||||
}
|
||||
|
||||
__noinline int dbi_import(MDBX_txn *txn, const size_t dbi) {
|
||||
const MDBX_env *const env = txn->env;
|
||||
if (dbi >= env->n_dbi || !env->dbs_flags[dbi])
|
||||
@@ -266,8 +283,8 @@ int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func
|
||||
else {
|
||||
if (txn->dbi_state[dbi] & DBI_STALE) {
|
||||
eASSERT(env, env->dbs_flags[dbi] & DB_VALID);
|
||||
int err = tbl_fetch(txn, dbi);
|
||||
if (unlikely(err == MDBX_SUCCESS))
|
||||
int err = tbl_refresh(txn, dbi);
|
||||
if (unlikely(err != MDBX_NOTFOUND))
|
||||
return err;
|
||||
}
|
||||
eASSERT(env, ((env->dbs_flags[dbi] ^ txn->dbs[dbi].flags) & DB_PERSISTENT_FLAGS) == 0);
|
||||
@@ -325,8 +342,9 @@ static inline size_t dbi_namelen(const MDBX_val name) {
|
||||
return (name.iov_len > sizeof(defer_free_item_t)) ? name.iov_len : sizeof(defer_free_item_t);
|
||||
}
|
||||
|
||||
static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
|
||||
MDBX_cmp_func *datacmp, MDBX_val name) {
|
||||
static int dbi_open_locked(MDBX_txn *txn, cursor_couple_t *maindb_cx, unsigned user_flags, MDBX_cmp_func *keycmp,
|
||||
MDBX_cmp_func *datacmp, MDBX_val name, const size_t fastpath_slot) {
|
||||
int rc;
|
||||
MDBX_env *const env = txn->env;
|
||||
|
||||
/* Cannot mix named table(s) with DUPSORT flags */
|
||||
@@ -352,12 +370,12 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MD
|
||||
env->kvs[MAIN_DBI].clc.v.cmp = builtin_datacmp(main_flags);
|
||||
txn->dbs[MAIN_DBI].flags = main_flags;
|
||||
txn->dbs[MAIN_DBI].dupfix_size = 0;
|
||||
int err = tbl_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]);
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
rc = tbl_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
txn->dbi_state[MAIN_DBI] = DBI_LINDO;
|
||||
txn->flags |= MDBX_TXN_ERROR;
|
||||
env->flags |= ENV_FATAL_ERROR;
|
||||
return err;
|
||||
return rc;
|
||||
}
|
||||
env->dbs_flags[MAIN_DBI] = main_flags | DB_VALID;
|
||||
txn->dbi_seqs[MAIN_DBI] = atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease);
|
||||
@@ -368,6 +386,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MD
|
||||
tASSERT(txn, env->kvs[MAIN_DBI].clc.k.cmp);
|
||||
|
||||
/* Is the DB already open? */
|
||||
defer_free_item_t *clone = nullptr;
|
||||
size_t slot = env->n_dbi;
|
||||
for (size_t scan = CORE_DBS; scan < env->n_dbi; ++scan) {
|
||||
if ((env->dbs_flags[scan] & DB_VALID) == 0) {
|
||||
@@ -377,21 +396,49 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MD
|
||||
}
|
||||
if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[scan].name) == 0) {
|
||||
slot = scan;
|
||||
int err = dbi_check(txn, slot);
|
||||
if (err == MDBX_BAD_DBI && txn->dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) {
|
||||
rc = dbi_check(txn, slot);
|
||||
if (rc == MDBX_BAD_DBI && txn->dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) {
|
||||
/* хендл использовался, стал невалидным,
|
||||
* но теперь явно пере-открывается в этой транзакци */
|
||||
* но теперь явно пере-открывается в этой транзакции */
|
||||
eASSERT(env, !txn->cursors[slot]);
|
||||
txn->dbi_state[slot] = DBI_LINDO;
|
||||
err = dbi_check(txn, slot);
|
||||
txn->dbi_seqs[slot] = 0;
|
||||
rc = dbi_import(txn, slot);
|
||||
/* TODO: FIXME */
|
||||
}
|
||||
if (err == MDBX_SUCCESS) {
|
||||
err = dbi_bind(txn, slot, user_flags, keycmp, datacmp);
|
||||
if (likely(err == MDBX_SUCCESS)) {
|
||||
goto done;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
if (unlikely((txn->dbi_state[slot] & DBI_STALE) == 0))
|
||||
goto done;
|
||||
|
||||
if (fastpath_slot /* уже был выполнен поиск посредством tbl_fetch() */) {
|
||||
if (slot != fastpath_slot)
|
||||
txn->dbs[slot] = txn->dbs[fastpath_slot];
|
||||
if (user_flags & MDBX_CREATE) {
|
||||
/* значит таблица уже была открытой, но проверка её наличия в fastpath вернула MDBX_NOTFOUND */
|
||||
rc = MDBX_NOTFOUND;
|
||||
} else {
|
||||
/* значит в fastpath был найден пустой слот и проверка наличия таблицы завершилась успешно */
|
||||
assert(rc == MDBX_SUCCESS);
|
||||
}
|
||||
} else {
|
||||
rc = tbl_fetch(txn, &maindb_cx->outer, slot, &name, user_flags);
|
||||
}
|
||||
return err;
|
||||
|
||||
if (likely(rc == MDBX_SUCCESS))
|
||||
goto done;
|
||||
|
||||
if (rc == MDBX_NOTFOUND && (user_flags & MDBX_CREATE)) {
|
||||
name = env->kvs[scan].name;
|
||||
goto create;
|
||||
}
|
||||
|
||||
return dbi_gone(txn, slot, rc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -409,88 +456,72 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MD
|
||||
env->n_dbi = (unsigned)slot + 1;
|
||||
eASSERT(env, slot < env->n_dbi);
|
||||
|
||||
int err = dbi_check(txn, slot);
|
||||
eASSERT(env, err == MDBX_BAD_DBI);
|
||||
if (unlikely(err != MDBX_BAD_DBI))
|
||||
rc = dbi_check(txn, slot);
|
||||
eASSERT(env, rc == MDBX_BAD_DBI);
|
||||
if (unlikely(rc != MDBX_BAD_DBI))
|
||||
return MDBX_PROBLEM;
|
||||
|
||||
/* Find the DB info */
|
||||
MDBX_val body;
|
||||
cursor_couple_t cx;
|
||||
int rc = cursor_init(&cx.outer, txn, MAIN_DBI);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
rc = cursor_seek(&cx.outer, &name, &body, MDBX_SET).err;
|
||||
rc = tbl_fetch(txn, &maindb_cx->outer, slot, &name, user_flags);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE))
|
||||
return rc;
|
||||
} else {
|
||||
/* make sure this is actually a table */
|
||||
node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
|
||||
if (unlikely((node_flags(node) & (N_DUP | N_TREE)) != N_TREE))
|
||||
return MDBX_INCOMPATIBLE;
|
||||
if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", body.iov_len);
|
||||
return MDBX_CORRUPTED;
|
||||
}
|
||||
memcpy(&txn->dbs[slot], body.iov_base, sizeof(tree_t));
|
||||
}
|
||||
|
||||
/* Done here so we cannot fail after creating a new DB */
|
||||
defer_free_item_t *const clone = osal_malloc(dbi_namelen(name));
|
||||
clone = osal_malloc(dbi_namelen(name));
|
||||
if (unlikely(!clone))
|
||||
return MDBX_ENOMEM;
|
||||
memcpy(clone, name.iov_base, name.iov_len);
|
||||
name.iov_base = clone;
|
||||
|
||||
create:
|
||||
tASSERT(txn, rc == MDBX_SUCCESS || rc == MDBX_NOTFOUND);
|
||||
uint8_t dbi_state = DBI_LINDO | DBI_VALID | DBI_FRESH;
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
/* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */
|
||||
tASSERT(txn, rc == MDBX_NOTFOUND);
|
||||
body.iov_base = memset(&txn->dbs[slot], 0, body.iov_len = sizeof(tree_t));
|
||||
txn->dbs[slot].root = P_INVALID;
|
||||
txn->dbs[slot].mod_txnid = txn->txnid;
|
||||
txn->dbs[slot].flags = user_flags & DB_PERSISTENT_FLAGS;
|
||||
cx.outer.next = txn->cursors[MAIN_DBI];
|
||||
txn->cursors[MAIN_DBI] = &cx.outer;
|
||||
rc = cursor_put_checklen(&cx.outer, &name, &body, N_TREE | MDBX_NOOVERWRITE);
|
||||
txn->cursors[MAIN_DBI] = cx.outer.next;
|
||||
rc = tbl_create(txn, &maindb_cx->outer, slot, &name, user_flags);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
dbi_state |= DBI_DIRTY | DBI_CREAT;
|
||||
txn->flags |= MDBX_TXN_DIRTY;
|
||||
tASSERT(txn, (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) != 0);
|
||||
}
|
||||
|
||||
/* Got info, register DBI in this txn */
|
||||
const uint32_t seq = dbi_seq_next(env, slot);
|
||||
eASSERT(env, env->dbs_flags[slot] == DB_POISON && !txn->cursors[slot] &&
|
||||
(txn->dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO);
|
||||
txn->dbi_state[slot] = dbi_state;
|
||||
memcpy(&txn->dbs[slot], body.iov_base, sizeof(txn->dbs[slot]));
|
||||
env->dbs_flags[slot] = txn->dbs[slot].flags;
|
||||
rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
eASSERT(env, !txn->cursors[slot]);
|
||||
if (clone) {
|
||||
eASSERT(env, env->dbs_flags[slot] == DB_POISON && (txn->dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO);
|
||||
txn->dbi_state[slot] = dbi_state;
|
||||
env->dbs_flags[slot] = txn->dbs[slot].flags;
|
||||
rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
env->kvs[slot].name = name;
|
||||
env->dbs_flags[slot] = txn->dbs[slot].flags | DB_VALID;
|
||||
txn->dbi_seqs[slot] = atomic_store32(&env->dbi_seqs[slot], seq, mo_AcquireRelease);
|
||||
env->kvs[slot].name = name;
|
||||
env->dbs_flags[slot] = txn->dbs[slot].flags | DB_VALID;
|
||||
txn->dbi_seqs[slot] = atomic_store32(&env->dbi_seqs[slot], seq, mo_AcquireRelease);
|
||||
} else {
|
||||
eASSERT(env, env->dbs_flags[slot] == (DB_VALID | (user_flags & DB_PERSISTENT_FLAGS)) &&
|
||||
env->dbs_flags[slot] == (DB_VALID | txn->dbs[slot].flags) &&
|
||||
txn->dbi_state[slot] == (DBI_LINDO | DBI_VALID | DBI_STALE));
|
||||
}
|
||||
|
||||
done:
|
||||
*dbi = (MDBX_dbi)slot;
|
||||
*(MDBX_dbi *)maindb_cx->userctx = (MDBX_dbi)slot;
|
||||
tASSERT(txn, slot < txn->n_dbi && (env->dbs_flags[slot] & DB_VALID) != 0);
|
||||
eASSERT(env, dbi_check(txn, slot) == MDBX_SUCCESS);
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
bailout:
|
||||
eASSERT(env, !txn->cursors[slot] && !env->kvs[slot].name.iov_len && !env->kvs[slot].name.iov_base);
|
||||
txn->dbi_state[slot] &= DBI_LINDO | DBI_OLDEN;
|
||||
env->dbs_flags[slot] = 0;
|
||||
osal_free(clone);
|
||||
if (slot + 1 == env->n_dbi)
|
||||
txn->n_dbi = env->n_dbi = (unsigned)slot;
|
||||
if (clone) {
|
||||
eASSERT(env, !txn->cursors[slot] && !env->kvs[slot].name.iov_len && !env->kvs[slot].name.iov_base);
|
||||
osal_free(clone);
|
||||
if (slot + 1 == env->n_dbi)
|
||||
txn->n_dbi = env->n_dbi = (unsigned)slot;
|
||||
} else {
|
||||
eASSERT(env, name.iov_base == env->kvs[slot].name.iov_base);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -528,18 +559,20 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDB
|
||||
if (unlikely(name->iov_len > txn->env->leaf_nodemax - NODESIZE - sizeof(tree_t)))
|
||||
return MDBX_EINVAL;
|
||||
|
||||
cursor_couple_t cx;
|
||||
size_t fastpath_slot = 0;
|
||||
#if MDBX_ENABLE_DBI_LOCKFREE
|
||||
/* Is the DB already open? */
|
||||
const MDBX_env *const env = txn->env;
|
||||
bool have_free_slot = env->n_dbi < env->max_dbi;
|
||||
for (size_t i = CORE_DBS; i < env->n_dbi; ++i) {
|
||||
if ((env->dbs_flags[i] & DB_VALID) == 0) {
|
||||
have_free_slot = true;
|
||||
size_t first_free_slot = env->n_dbi;
|
||||
for (size_t slot = CORE_DBS; slot < env->n_dbi; ++slot) {
|
||||
if ((env->dbs_flags[slot] & DB_VALID) == 0) {
|
||||
first_free_slot = (first_free_slot < slot) ? first_free_slot : slot;
|
||||
continue;
|
||||
}
|
||||
|
||||
struct dbi_snap_result snap = dbi_snap(env, i);
|
||||
const MDBX_val snap_name = env->kvs[i].name;
|
||||
struct dbi_snap_result snap = dbi_snap(env, slot);
|
||||
const MDBX_val snap_name = env->kvs[slot].name;
|
||||
const uint32_t main_seq = atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease);
|
||||
MDBX_cmp_func *const snap_cmp = env->kvs[MAIN_DBI].clc.k.cmp;
|
||||
if (unlikely(!(snap.flags & DB_VALID) || !snap_name.iov_base || !snap_name.iov_len || !snap_cmp))
|
||||
@@ -547,10 +580,10 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDB
|
||||
goto slowpath_locking;
|
||||
|
||||
const bool name_match = snap_cmp(&snap_name, name) == 0;
|
||||
if (unlikely(snap.sequence != atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease) ||
|
||||
if (unlikely(snap.sequence != atomic_load32(&env->dbi_seqs[slot], mo_AcquireRelease) ||
|
||||
main_seq != atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) ||
|
||||
snap.flags != env->dbs_flags[i] || snap_name.iov_base != env->kvs[i].name.iov_base ||
|
||||
snap_name.iov_len != env->kvs[i].name.iov_len))
|
||||
snap.flags != env->dbs_flags[slot] || snap_name.iov_base != env->kvs[slot].name.iov_base ||
|
||||
snap_name.iov_len != env->kvs[slot].name.iov_len))
|
||||
/* похоже на столкновение с параллельно работающим обновлением */
|
||||
goto slowpath_locking;
|
||||
|
||||
@@ -559,45 +592,65 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDB
|
||||
|
||||
osal_flush_incoherent_cpu_writeback();
|
||||
if (user_flags != MDBX_ACCEDE &&
|
||||
(((user_flags ^ snap.flags) & DB_PERSISTENT_FLAGS) || (keycmp && keycmp != env->kvs[i].clc.k.cmp) ||
|
||||
(datacmp && datacmp != env->kvs[i].clc.v.cmp)))
|
||||
(((user_flags ^ snap.flags) & DB_PERSISTENT_FLAGS) || (keycmp && keycmp != env->kvs[slot].clc.k.cmp) ||
|
||||
(datacmp && datacmp != env->kvs[slot].clc.v.cmp)))
|
||||
/* есть подозрение что пользователь открывает таблицу с другими флагами/атрибутами
|
||||
* или другими компараторами, поэтому уходим в безопасный режим */
|
||||
goto slowpath_locking;
|
||||
|
||||
rc = dbi_check(txn, i);
|
||||
if (rc == MDBX_BAD_DBI && txn->dbi_state[i] == (DBI_OLDEN | DBI_LINDO)) {
|
||||
rc = dbi_check(txn, slot);
|
||||
if (rc == MDBX_BAD_DBI && txn->dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) {
|
||||
/* хендл использовался, стал невалидным,
|
||||
* но теперь явно пере-открывается в этой транзакци */
|
||||
eASSERT(env, !txn->cursors[i]);
|
||||
txn->dbi_state[i] = DBI_LINDO;
|
||||
rc = dbi_check(txn, i);
|
||||
* но теперь явно пере-открывается в этой транзакции */
|
||||
goto slowpath_locking;
|
||||
}
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
if (unlikely(snap.sequence != atomic_load32(&env->dbi_seqs[i], mo_AcquireRelease) ||
|
||||
main_seq != atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) ||
|
||||
snap.flags != env->dbs_flags[i] || snap_name.iov_base != env->kvs[i].name.iov_base ||
|
||||
snap_name.iov_len != env->kvs[i].name.iov_len))
|
||||
/* похоже на столкновение с параллельно работающим обновлением */
|
||||
goto slowpath_locking;
|
||||
rc = dbi_bind(txn, i, user_flags, keycmp, datacmp);
|
||||
if (likely(rc == MDBX_SUCCESS))
|
||||
*dbi = (MDBX_dbi)i;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
if (unlikely(snap.sequence != atomic_load32(&env->dbi_seqs[slot], mo_AcquireRelease) ||
|
||||
main_seq != atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease) ||
|
||||
snap.flags != env->dbs_flags[slot] || snap_name.iov_base != env->kvs[slot].name.iov_base ||
|
||||
snap_name.iov_len != env->kvs[slot].name.iov_len))
|
||||
/* похоже на столкновение с параллельно работающим обновлением */
|
||||
goto slowpath_locking;
|
||||
|
||||
rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
tASSERT(txn, F_ISSET(txn->dbi_state[slot], DBI_LINDO | DBI_VALID));
|
||||
if (txn->dbi_state[slot] & DBI_STALE) {
|
||||
rc = tbl_fetch(txn, &cx.outer, fastpath_slot = slot, name, user_flags);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
if (rc == MDBX_NOTFOUND && (user_flags & MDBX_CREATE))
|
||||
/* таблицы уже нет, но запрошено её пересоздание */
|
||||
goto slowpath_locking;
|
||||
|
||||
return dbi_gone(txn, slot, rc);
|
||||
}
|
||||
txn->dbi_state[slot] -= DBI_STALE;
|
||||
}
|
||||
return rc;
|
||||
*dbi = (MDBX_dbi)slot;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
/* Fail, if no free slot and max hit */
|
||||
if (unlikely(!have_free_slot))
|
||||
if (unlikely(first_free_slot >= env->max_dbi))
|
||||
return MDBX_DBS_FULL;
|
||||
|
||||
slowpath_locking:
|
||||
if (!(user_flags & MDBX_CREATE)) {
|
||||
rc = tbl_fetch(txn, &cx.outer, fastpath_slot = first_free_slot, name, user_flags);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
}
|
||||
|
||||
slowpath_locking:
|
||||
#endif /* MDBX_ENABLE_DBI_LOCKFREE */
|
||||
|
||||
cx.userctx = dbi;
|
||||
rc = osal_fastmutex_acquire(&txn->env->dbi_lock);
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name);
|
||||
rc = dbi_open_locked(txn, &cx, user_flags, keycmp, datacmp, *name, fastpath_slot);
|
||||
ENSURE(txn->env, osal_fastmutex_release(&txn->env->dbi_lock) == MDBX_SUCCESS);
|
||||
}
|
||||
return rc;
|
||||
|
||||
@@ -11,7 +11,7 @@ MDBX_NOTHROW_CONST_FUNCTION MDBX_MAYBE_UNUSED MDBX_INTERNAL size_t dbi_bitmap_ct
|
||||
intptr_t bmi);
|
||||
|
||||
static inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) {
|
||||
tASSERT(txn, bmi > 0);
|
||||
tASSERT(txn, bmi != 0);
|
||||
STATIC_ASSERT(sizeof(bmi) >= sizeof(txn->dbi_sparse[0]));
|
||||
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl)
|
||||
if (sizeof(txn->dbi_sparse[0]) <= sizeof(int))
|
||||
@@ -80,6 +80,7 @@ static inline bool dbi_foreach_step(const MDBX_txn *const txn, size_t *bitmap_it
|
||||
#define TXN_FOREACH_DBI_USER(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, CORE_DBS)
|
||||
|
||||
MDBX_INTERNAL int dbi_import(MDBX_txn *txn, const size_t dbi);
|
||||
MDBX_INTERNAL int dbi_gone(MDBX_txn *txn, const size_t dbi, const int rc);
|
||||
|
||||
struct dbi_snap_result {
|
||||
uint32_t sequence;
|
||||
|
||||
12
src/dxb.c
12
src/dxb.c
@@ -532,7 +532,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
err = osal_fallocate(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now);
|
||||
err = osal_fsetsize(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
@@ -570,7 +570,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit
|
||||
|
||||
size_t expected_filesize = 0;
|
||||
const size_t used_bytes = pgno2bytes(env, header.geometry.first_unallocated);
|
||||
const size_t used_aligned2os_bytes = ceil_powerof2(used_bytes, globals.sys_pagesize);
|
||||
const size_t used_aligned2os_bytes = ceil_powerof2(used_bytes, globals.sys_allocation_granularity);
|
||||
if ((env->flags & MDBX_RDONLY) /* readonly */
|
||||
|| lck_rc != MDBX_RESULT_TRUE /* not exclusive */
|
||||
|| /* recovery mode */ env->stuck_meta >= 0) {
|
||||
@@ -639,7 +639,7 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit
|
||||
env->geo_in_bytes.shrink = pgno_ceil2sp_bytes(env, pv2pages(header.geometry.shrink_pv));
|
||||
}
|
||||
|
||||
ENSURE(env, pgno_ceil2sp_bytes(env, header.geometry.now) == env->geo_in_bytes.now);
|
||||
ENSURE(env, pgno_ceil2ag_bytes(env, header.geometry.now) == env->geo_in_bytes.now);
|
||||
ENSURE(env, env->geo_in_bytes.now >= used_bytes);
|
||||
if (!expected_filesize)
|
||||
expected_filesize = env->geo_in_bytes.now;
|
||||
@@ -843,13 +843,13 @@ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bit
|
||||
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
|
||||
//-------------------------------------------------- shrink DB & update geo
|
||||
/* re-check size after mmap */
|
||||
if ((env->dxb_mmap.current & (globals.sys_pagesize - 1)) != 0 || env->dxb_mmap.current < used_bytes) {
|
||||
if (floor_powerof2(env->dxb_mmap.current, globals.sys_pagesize) < used_bytes) {
|
||||
ERROR("unacceptable/unexpected datafile size %" PRIuPTR, env->dxb_mmap.current);
|
||||
return MDBX_PROBLEM;
|
||||
}
|
||||
if (env->dxb_mmap.current != env->geo_in_bytes.now) {
|
||||
header.geometry.now = bytes2pgno(env, env->dxb_mmap.current);
|
||||
NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO " pages", env->dxb_mmap.current,
|
||||
header.geometry.now = bytes2pgno(env, env->geo_in_bytes.now);
|
||||
NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, aligned %" PRIaPGNO " pages", env->geo_in_bytes.now,
|
||||
header.geometry.now);
|
||||
}
|
||||
|
||||
|
||||
13
src/env.c
13
src/env.c
@@ -305,6 +305,12 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) {
|
||||
|
||||
env->fd4meta = env->lazy_fd;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
env->dxb_lock_event = CreateEventW(nullptr, true, false, nullptr);
|
||||
if (unlikely(!env->dxb_lock_event))
|
||||
return (int)GetLastError();
|
||||
env->lck_lock_event = CreateEventW(nullptr, true, false, nullptr);
|
||||
if (unlikely(!env->lck_lock_event))
|
||||
return (int)GetLastError();
|
||||
eASSERT(env, env->ioring.overlapped_fd == 0);
|
||||
bool ior_direct = false;
|
||||
if (!(env->flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) {
|
||||
@@ -346,9 +352,6 @@ __cold int env_open(MDBX_env *env, mdbx_mode_t mode) {
|
||||
&env->ioring.overlapped_fd, 0);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
env->dxb_lock_event = CreateEventW(nullptr, true, false, nullptr);
|
||||
if (unlikely(!env->dxb_lock_event))
|
||||
return (int)GetLastError();
|
||||
osal_fseek(env->ioring.overlapped_fd, safe_parking_lot_offset);
|
||||
}
|
||||
#else
|
||||
@@ -545,6 +548,10 @@ __cold int env_close(MDBX_env *env, bool resurrect_after_fork) {
|
||||
CloseHandle(env->dxb_lock_event);
|
||||
env->dxb_lock_event = INVALID_HANDLE_VALUE;
|
||||
}
|
||||
if (env->lck_lock_event != INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(env->lck_lock_event);
|
||||
env->lck_lock_event = INVALID_HANDLE_VALUE;
|
||||
}
|
||||
eASSERT(env, !resurrect_after_fork);
|
||||
if (env->pathname_char) {
|
||||
osal_free(env->pathname_char);
|
||||
|
||||
@@ -65,6 +65,9 @@ union logger_union {
|
||||
struct libmdbx_globals {
|
||||
bin128_t bootid;
|
||||
unsigned sys_pagesize, sys_allocation_granularity;
|
||||
#ifdef AT_UCACHEBSIZE
|
||||
unsigned sys_unified_cache_block;
|
||||
#endif /* AT_UCACHEBSIZE */
|
||||
uint8_t sys_pagesize_ln2;
|
||||
uint8_t runtime_flags;
|
||||
uint8_t loglevel;
|
||||
|
||||
@@ -878,7 +878,7 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags)
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
if (unlikely(!is_reclaimable(txn, mc, flags))) {
|
||||
eASSERT(env, (txn->flags & txn_gc_drained) || num > 1);
|
||||
eASSERT(env, (txn->flags & txn_gc_drained) || num > 1 || mc->tree == &txn->dbs[FREE_DBI]);
|
||||
goto no_gc;
|
||||
}
|
||||
|
||||
@@ -1090,7 +1090,7 @@ next_gc:
|
||||
rkl_t *rkl = &txn->wr.gc.reclaimed;
|
||||
const char *rkl_name = "reclaimed";
|
||||
if (mc->dbi_state != txn->dbi_state &&
|
||||
(MDBX_DEBUG || pnl_size(txn->wr.repnl) > (size_t)gc->tree->height + gc->tree->height + 3)) {
|
||||
(MDBX_DEBUG > 0 || pnl_size(txn->wr.repnl) > (size_t)gc->tree->height + gc->tree->height + 3)) {
|
||||
gc->next = txn->cursors[FREE_DBI];
|
||||
txn->cursors[FREE_DBI] = gc;
|
||||
ret.err = cursor_del(gc, 0);
|
||||
|
||||
@@ -1290,9 +1290,12 @@ static int gc_fill_returned(MDBX_txn *txn, gcu_t *ctx) {
|
||||
rkl_iter_t iter = rkl_iterator(&txn->wr.gc.comeback, is_lifo(txn));
|
||||
size_t surplus = ctx->return_reserved_hi - amount, stored = 0;
|
||||
const uint64_t factor = ((uint64_t)surplus << 32) / ctx->return_reserved_hi;
|
||||
TRACE("%s: amount %zu, slots %zu, surplus %zu (%zu..%zu), factor %.6f (%" PRIu64 " >> 32, sharp %.12f)",
|
||||
dbg_prefix(ctx), amount, slots, surplus, ctx->return_reserved_lo, ctx->return_reserved_hi,
|
||||
factor / (double)UINT32_MAX, factor, surplus / (double)ctx->return_reserved_hi);
|
||||
ratio2digits_buffer_t factor_rough, factor_sharp;
|
||||
TRACE("%s: amount %zu, slots %zu, surplus %zu (%zu..%zu), factor %s (%" PRIu64 " >> 32, sharp %s)", dbg_prefix(ctx),
|
||||
amount, slots, surplus, ctx->return_reserved_lo, ctx->return_reserved_hi,
|
||||
ratio2digits(factor, UINT32_MAX, &factor_rough, 6), factor,
|
||||
ratio2digits(surplus, ctx->return_reserved_hi, &factor_sharp, 12));
|
||||
|
||||
do {
|
||||
const size_t left = amount - stored;
|
||||
tASSERT(txn, left > 0 && left <= amount);
|
||||
|
||||
27
src/global.c
27
src/global.c
@@ -27,8 +27,7 @@ BOOL APIENTRY DllMain(HANDLE module, DWORD reason, LPVOID reserved)
|
||||
#if !MDBX_MANUAL_MODULE_HANDLER
|
||||
static
|
||||
#endif /* !MDBX_MANUAL_MODULE_HANDLER */
|
||||
void NTAPI
|
||||
mdbx_module_handler(PVOID module, DWORD reason, PVOID reserved)
|
||||
void NTAPI mdbx_module_handler(PVOID module, DWORD reason, PVOID reserved)
|
||||
#endif /* MDBX_BUILD_SHARED_LIBRARY */
|
||||
{
|
||||
(void)reserved;
|
||||
@@ -176,8 +175,29 @@ __cold static __attribute__((__destructor__)) void mdbx_global_destructor(void)
|
||||
|
||||
struct libmdbx_globals globals;
|
||||
|
||||
static bool getenv_bool(const char *name, bool default_value) {
|
||||
const char *value = osal_getenv(name, false);
|
||||
if (value) {
|
||||
if (*value == 0 /* implied ON */)
|
||||
return true;
|
||||
if (strcasecmp(value, "yes") == 0 || strcasecmp(value, "on") == 0 || strcasecmp(value, "true") == 0 ||
|
||||
strcasecmp(value, "1") == 0)
|
||||
return true;
|
||||
if (strcasecmp(value, "no") == 0 || strcasecmp(value, "off") == 0 || strcasecmp(value, "false") == 0 ||
|
||||
strcasecmp(value, "0") == 0)
|
||||
return false;
|
||||
}
|
||||
return default_value;
|
||||
}
|
||||
|
||||
__cold static void mdbx_init(void) {
|
||||
globals.runtime_flags = ((MDBX_DEBUG) > 0) * MDBX_DBG_ASSERT + ((MDBX_DEBUG) > 1) * MDBX_DBG_AUDIT;
|
||||
globals.runtime_flags = (getenv_bool("MDBX_DBG_ASSERT", (MDBX_DEBUG) > 0) ? MDBX_DBG_ASSERT : 0) |
|
||||
(getenv_bool("MDBX_DBG_AUDIT", (MDBX_DEBUG) > 1) ? MDBX_DBG_AUDIT : 0) |
|
||||
(getenv_bool("MDBX_DBG_JITTER", false) ? MDBX_DBG_JITTER : 0) |
|
||||
(getenv_bool("MDBX_DBG_DUMP", false) ? MDBX_DBG_DUMP : 0) |
|
||||
(getenv_bool("MDBX_DBG_LEGACY_MULTIOPEN", false) ? MDBX_DBG_LEGACY_MULTIOPEN : 0) |
|
||||
(getenv_bool("MDBX_DBG_LEGACY_OVERLAP", false) ? MDBX_DBG_LEGACY_OVERLAP : 0) |
|
||||
(getenv_bool("MDBX_DBG_DONT_UPGRADE", false) ? MDBX_DBG_DONT_UPGRADE : 0);
|
||||
globals.loglevel = MDBX_LOG_FATAL;
|
||||
ENSURE(nullptr, osal_fastmutex_init(&globals.debug_lock) == 0);
|
||||
osal_ctor();
|
||||
@@ -379,6 +399,7 @@ __dll_export
|
||||
#else /* Windows */
|
||||
" MDBX_LOCKING=" MDBX_LOCKING_CONFIG
|
||||
" MDBX_USE_OFDLOCKS=" MDBX_USE_OFDLOCKS_CONFIG
|
||||
" MDBX_USE_FALLOCATE=" MDBX_USE_FALLOCATE_CONFIG
|
||||
#endif /* !Windows */
|
||||
" MDBX_CACHELINE_SIZE=" MDBX_STRINGIFY(MDBX_CACHELINE_SIZE)
|
||||
" MDBX_CPU_WRITEBACK_INCOHERENT=" MDBX_STRINGIFY(MDBX_CPU_WRITEBACK_INCOHERENT)
|
||||
|
||||
@@ -142,10 +142,10 @@ struct kvx {
|
||||
|
||||
/* Non-shared DBI state flags inside transaction */
|
||||
enum dbi_state {
|
||||
DBI_DIRTY = 0x01 /* DB was written in this txn */,
|
||||
DBI_STALE = 0x02 /* Named-DB record is older than txnID */,
|
||||
DBI_FRESH = 0x04 /* Named-DB handle opened in this txn */,
|
||||
DBI_CREAT = 0x08 /* Named-DB handle created in this txn */,
|
||||
DBI_DIRTY = 0x01 /* table was written in this txn */,
|
||||
DBI_STALE = 0x02 /* cached table record is outdated and should be reloaded/refreshed */,
|
||||
DBI_FRESH = 0x04 /* table handle opened in this txn */,
|
||||
DBI_CREAT = 0x08 /* table handle created in this txn */,
|
||||
DBI_VALID = 0x10 /* Handle is valid, see also DB_VALID */,
|
||||
DBI_OLDEN = 0x40 /* Handle was closed/reopened outside txn */,
|
||||
DBI_LINDO = 0x80 /* Lazy initialization done for DBI-slot */,
|
||||
@@ -352,6 +352,7 @@ struct MDBX_env {
|
||||
mdbx_filehandle_t dsync_fd, fd4meta;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
HANDLE dxb_lock_event;
|
||||
HANDLE lck_lock_event;
|
||||
#endif /* Windows */
|
||||
osal_mmap_t lck_mmap; /* The lock file */
|
||||
lck_t *lck;
|
||||
@@ -481,7 +482,8 @@ struct MDBX_env {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
osal_srwlock_t remap_guard;
|
||||
/* Workaround for LockFileEx and WriteFile multithread bug */
|
||||
CRITICAL_SECTION windowsbug_lock;
|
||||
CRITICAL_SECTION lck_event_cs;
|
||||
CRITICAL_SECTION dxb_event_cs;
|
||||
char *pathname_char; /* cache of multi-byte representation of pathname
|
||||
to the DB files */
|
||||
#else
|
||||
|
||||
@@ -16,8 +16,11 @@
|
||||
#define LCK_WAITFOR 0
|
||||
#define LCK_DONTWAIT LOCKFILE_FAIL_IMMEDIATELY
|
||||
|
||||
static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags, size_t offset, size_t bytes) {
|
||||
TRACE("lock>>: fd %p, event %p, flags 0x%x offset %zu, bytes %zu >>", fd, event, flags, offset, bytes);
|
||||
static int flock_ex(HANDLE fd, HANDLE event, unsigned flags, size_t offset, size_t bytes, unsigned timeout_ms) {
|
||||
TRACE("lock>>: fd %p, timeout %u ms, event %p, flags 0x%x offset %zu, bytes %zu >>", fd, timeout_ms, event, flags,
|
||||
offset, bytes);
|
||||
assert(timeout_ms == 0 || (event && event != INVALID_HANDLE_VALUE));
|
||||
assert(timeout_ms == 0 || (flags & LCK_DONTWAIT) == 0);
|
||||
OVERLAPPED ov;
|
||||
ov.Internal = 0;
|
||||
ov.InternalHigh = 0;
|
||||
@@ -25,43 +28,43 @@ static int flock_with_event(HANDLE fd, HANDLE event, unsigned flags, size_t offs
|
||||
ov.Offset = (DWORD)offset;
|
||||
ov.OffsetHigh = HIGH_DWORD(offset);
|
||||
|
||||
int retry_left = (flags & LOCKFILE_FAIL_IMMEDIATELY) ? 3 : 0;
|
||||
while (true) {
|
||||
if (LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov)) {
|
||||
TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd, event, flags, offset, bytes, "done");
|
||||
if (LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov)) {
|
||||
TRACE("lock<<: fd %p, timeout %u ms, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd, timeout_ms, event,
|
||||
flags, offset, bytes, "done");
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
DWORD rc = GetLastError();
|
||||
if (rc == ERROR_IO_PENDING) {
|
||||
if (timeout_ms) {
|
||||
rc = osal_waitstatus2errcode(WaitForSingleObject(event, timeout_ms));
|
||||
if (rc != MDBX_SUCCESS) {
|
||||
if (rc == ERROR_TIMEOUT)
|
||||
rc = ERROR_LOCK_VIOLATION;
|
||||
goto bailout;
|
||||
}
|
||||
}
|
||||
if (GetOverlappedResult(fd, &ov, &rc, true)) {
|
||||
TRACE("lock<<: fd %p, timeout %u ms, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd, timeout_ms, event,
|
||||
flags, offset, bytes, "overlapped-done");
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
DWORD rc = GetLastError();
|
||||
if (rc == ERROR_IO_PENDING) {
|
||||
if (event) {
|
||||
if (GetOverlappedResult(fd, &ov, &rc, true)) {
|
||||
TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << %s", fd, event, flags, offset, bytes,
|
||||
"overlapped-done");
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
rc = GetLastError();
|
||||
} else
|
||||
CancelIo(fd);
|
||||
}
|
||||
|
||||
if (rc != ERROR_LOCK_VIOLATION || --retry_left < 1) {
|
||||
TRACE("lock<<: fd %p, event %p, flags 0x%x offset %zu, bytes %zu << err %d", fd, event, flags, offset, bytes,
|
||||
(int)rc);
|
||||
return (int)rc;
|
||||
}
|
||||
|
||||
SleepEx(0, true);
|
||||
bailout:
|
||||
CancelIo(fd);
|
||||
}
|
||||
|
||||
TRACE("lock<<: fd %p, timeout %u ms, event %p, flags 0x%x offset %zu, bytes %zu << err %d", fd, timeout_ms, event,
|
||||
flags, offset, bytes, (int)rc);
|
||||
return (int)rc;
|
||||
}
|
||||
|
||||
static inline int flock(HANDLE fd, unsigned flags, size_t offset, size_t bytes) {
|
||||
return flock_with_event(fd, 0, flags, offset, bytes);
|
||||
static int flock_lck(const MDBX_env *env, unsigned flags, size_t offset, size_t bytes, unsigned timeout_ms) {
|
||||
return flock_ex(env->lck_mmap.fd, env->lck_lock_event, flags, offset, bytes, timeout_ms);
|
||||
}
|
||||
|
||||
static inline int flock_data(const MDBX_env *env, unsigned flags, size_t offset, size_t bytes) {
|
||||
static int flock_dxb(const MDBX_env *env, unsigned flags, size_t offset, size_t bytes) {
|
||||
const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd;
|
||||
return flock_with_event(fd4data, env->dxb_lock_event, flags, offset, bytes);
|
||||
return flock_ex(fd4data, env->dxb_lock_event, flags, offset, bytes, 0);
|
||||
}
|
||||
|
||||
static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) {
|
||||
@@ -84,11 +87,11 @@ static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) {
|
||||
|
||||
int lck_txn_lock(MDBX_env *env, bool dontwait) {
|
||||
if (dontwait) {
|
||||
if (!TryEnterCriticalSection(&env->windowsbug_lock))
|
||||
if (!TryEnterCriticalSection(&env->dxb_event_cs))
|
||||
return MDBX_BUSY;
|
||||
} else {
|
||||
__try {
|
||||
EnterCriticalSection(&env->windowsbug_lock);
|
||||
EnterCriticalSection(&env->dxb_event_cs);
|
||||
} __except ((GetExceptionCode() == 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */)
|
||||
? EXCEPTION_EXECUTE_HANDLER
|
||||
: EXCEPTION_CONTINUE_SEARCH) {
|
||||
@@ -100,34 +103,32 @@ int lck_txn_lock(MDBX_env *env, bool dontwait) {
|
||||
if (env->flags & MDBX_EXCLUSIVE)
|
||||
goto done;
|
||||
|
||||
const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd;
|
||||
int rc = flock_with_event(fd4data, env->dxb_lock_event,
|
||||
dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) : (LCK_EXCLUSIVE | LCK_WAITFOR), DXB_BODY);
|
||||
int rc = flock_dxb(env, dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) : (LCK_EXCLUSIVE | LCK_WAITFOR), DXB_BODY);
|
||||
|
||||
if (rc == MDBX_SUCCESS) {
|
||||
done:
|
||||
if (env->basal_txn)
|
||||
env->basal_txn->owner = osal_thread_self();
|
||||
/* Zap: Failing to release lock 'env->windowsbug_lock'
|
||||
/* Zap: Failing to release lock 'env->dxb_event_cs'
|
||||
* in function 'mdbx_txn_lock' */
|
||||
MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
LeaveCriticalSection(&env->windowsbug_lock);
|
||||
LeaveCriticalSection(&env->dxb_event_cs);
|
||||
return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY;
|
||||
}
|
||||
|
||||
void lck_txn_unlock(MDBX_env *env) {
|
||||
eASSERT(env, !env->basal_txn || env->basal_txn->owner == osal_thread_self());
|
||||
if ((env->flags & MDBX_EXCLUSIVE) == 0) {
|
||||
const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd;
|
||||
int err = funlock(fd4data, DXB_BODY);
|
||||
int err = funlock(env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd, DXB_BODY);
|
||||
if (err != MDBX_SUCCESS)
|
||||
mdbx_panic("%s failed: err %u", __func__, err);
|
||||
}
|
||||
if (env->basal_txn)
|
||||
env->basal_txn->owner = 0;
|
||||
LeaveCriticalSection(&env->windowsbug_lock);
|
||||
LeaveCriticalSection(&env->dxb_event_cs);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
@@ -142,19 +143,34 @@ void lck_txn_unlock(MDBX_env *env) {
|
||||
#define LCK_UPPER LCK_UP_OFFSET, LCK_UP_LEN
|
||||
|
||||
int lck_rdt_lock(MDBX_env *env) {
|
||||
int rc;
|
||||
imports.srwl_AcquireShared(&env->remap_guard);
|
||||
|
||||
__try {
|
||||
EnterCriticalSection(&env->lck_event_cs);
|
||||
} __except ((GetExceptionCode() == 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */)
|
||||
? EXCEPTION_EXECUTE_HANDLER
|
||||
: EXCEPTION_CONTINUE_SEARCH) {
|
||||
rc = MDBX_EDEADLK;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (env->lck_mmap.fd == INVALID_HANDLE_VALUE)
|
||||
return MDBX_SUCCESS; /* readonly database in readonly filesystem */
|
||||
goto done; /* readonly database in readonly filesystem */
|
||||
|
||||
/* transition from S-? (used) to S-E (locked),
|
||||
* e.g. exclusive lock upper-part */
|
||||
if (env->flags & MDBX_EXCLUSIVE)
|
||||
return MDBX_SUCCESS;
|
||||
goto done;
|
||||
|
||||
int rc = flock(env->lck_mmap.fd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER);
|
||||
if (rc == MDBX_SUCCESS)
|
||||
rc = flock_lck(env, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER, 0);
|
||||
if (rc == MDBX_SUCCESS) {
|
||||
done:
|
||||
MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
bailout:
|
||||
imports.srwl_ReleaseShared(&env->remap_guard);
|
||||
return rc;
|
||||
}
|
||||
@@ -166,11 +182,12 @@ void lck_rdt_unlock(MDBX_env *env) {
|
||||
if (err != MDBX_SUCCESS)
|
||||
mdbx_panic("%s failed: err %u", __func__, err);
|
||||
}
|
||||
LeaveCriticalSection(&env->lck_event_cs);
|
||||
imports.srwl_ReleaseShared(&env->remap_guard);
|
||||
}
|
||||
|
||||
int osal_lockfile(mdbx_filehandle_t fd, bool wait) {
|
||||
return flock(fd, wait ? LCK_EXCLUSIVE | LCK_WAITFOR : LCK_EXCLUSIVE | LCK_DONTWAIT, 0, DXB_MAXLEN);
|
||||
return flock_ex(fd, 0, wait ? LCK_EXCLUSIVE | LCK_WAITFOR : LCK_EXCLUSIVE | LCK_DONTWAIT, 0, DXB_MAXLEN, 0);
|
||||
}
|
||||
|
||||
static int suspend_and_append(mdbx_handle_array_t **array, const DWORD ThreadId) {
|
||||
@@ -359,15 +376,18 @@ static void lck_unlock(MDBX_env *env) {
|
||||
}
|
||||
}
|
||||
|
||||
#define TIMEOUT_SHORT_MS 121
|
||||
#define TIMEOUT_LONG_MS 900000 /* 15 min */
|
||||
|
||||
/* Seize state as 'exclusive-write' (E-E and returns MDBX_RESULT_TRUE)
|
||||
* or as 'used' (S-? and returns MDBX_RESULT_FALSE).
|
||||
* Otherwise returns an error. */
|
||||
static int internal_seize_lck(HANDLE lfd) {
|
||||
assert(lfd != INVALID_HANDLE_VALUE);
|
||||
static int internal_seize_lck(MDBX_env *env) {
|
||||
assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE);
|
||||
|
||||
/* 1) now on ?-? (free), get ?-E (middle) */
|
||||
jitter4testing(false);
|
||||
int rc = flock(lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER);
|
||||
int rc = flock_lck(env, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER, TIMEOUT_LONG_MS);
|
||||
if (rc != MDBX_SUCCESS) {
|
||||
/* 2) something went wrong, give up */;
|
||||
ERROR("%s, err %u", "?-?(free) >> ?-E(middle)", rc);
|
||||
@@ -376,7 +396,7 @@ static int internal_seize_lck(HANDLE lfd) {
|
||||
|
||||
/* 3) now on ?-E (middle), try E-E (exclusive-write) */
|
||||
jitter4testing(false);
|
||||
rc = flock(lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER);
|
||||
rc = flock_lck(env, LCK_EXCLUSIVE, LCK_LOWER, TIMEOUT_SHORT_MS);
|
||||
if (rc == MDBX_SUCCESS)
|
||||
return MDBX_RESULT_TRUE /* 4) got E-E (exclusive-write), done */;
|
||||
|
||||
@@ -384,7 +404,7 @@ static int internal_seize_lck(HANDLE lfd) {
|
||||
jitter4testing(false);
|
||||
if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
|
||||
/* 6) something went wrong, give up */
|
||||
rc = funlock(lfd, LCK_UPPER);
|
||||
rc = funlock(env->lck_mmap.fd, LCK_UPPER);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
mdbx_panic("%s(%s) failed: err %u", __func__, "?-E(middle) >> ?-?(free)", rc);
|
||||
return rc;
|
||||
@@ -392,7 +412,7 @@ static int internal_seize_lck(HANDLE lfd) {
|
||||
|
||||
/* 7) still on ?-E (middle), try S-E (locked) */
|
||||
jitter4testing(false);
|
||||
rc = flock(lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER);
|
||||
rc = flock_lck(env, LCK_SHARED, LCK_LOWER, TIMEOUT_LONG_MS);
|
||||
|
||||
jitter4testing(false);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
@@ -400,7 +420,7 @@ static int internal_seize_lck(HANDLE lfd) {
|
||||
|
||||
/* 8) now on S-E (locked) or still on ?-E (middle),
|
||||
* transition to S-? (used) or ?-? (free) */
|
||||
int err = funlock(lfd, LCK_UPPER);
|
||||
int err = funlock(env->lck_mmap.fd, LCK_UPPER);
|
||||
if (err != MDBX_SUCCESS)
|
||||
mdbx_panic("%s(%s) failed: err %u", __func__, "X-E(locked/middle) >> X-?(used/free)", err);
|
||||
|
||||
@@ -409,8 +429,6 @@ static int internal_seize_lck(HANDLE lfd) {
|
||||
}
|
||||
|
||||
int lck_seize(MDBX_env *env) {
|
||||
const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd;
|
||||
assert(fd4data != INVALID_HANDLE_VALUE);
|
||||
if (env->flags & MDBX_EXCLUSIVE)
|
||||
return MDBX_RESULT_TRUE /* nope since files were must be opened
|
||||
non-shareable */
|
||||
@@ -419,13 +437,13 @@ int lck_seize(MDBX_env *env) {
|
||||
if (env->lck_mmap.fd == INVALID_HANDLE_VALUE) {
|
||||
/* LY: without-lck mode (e.g. on read-only filesystem) */
|
||||
jitter4testing(false);
|
||||
int rc = flock_data(env, LCK_SHARED | LCK_DONTWAIT, DXB_WHOLE);
|
||||
int rc = flock_dxb(env, LCK_SHARED | LCK_DONTWAIT, DXB_WHOLE);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
ERROR("%s, err %u", "without-lck", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int rc = internal_seize_lck(env->lck_mmap.fd);
|
||||
int rc = internal_seize_lck(env);
|
||||
jitter4testing(false);
|
||||
if (rc == MDBX_RESULT_TRUE && (env->flags & MDBX_RDONLY) == 0) {
|
||||
/* Check that another process don't operates in without-lck mode.
|
||||
@@ -434,7 +452,7 @@ int lck_seize(MDBX_env *env) {
|
||||
* - we need an exclusive lock for do so;
|
||||
* - we can't lock meta-pages, otherwise other process could get an error
|
||||
* while opening db in valid (non-conflict) mode. */
|
||||
int err = flock_data(env, LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_WHOLE);
|
||||
int err = flock_dxb(env, LCK_EXCLUSIVE | LCK_DONTWAIT, DXB_WHOLE);
|
||||
if (err != MDBX_SUCCESS) {
|
||||
ERROR("%s, err %u", "lock-against-without-lck", err);
|
||||
jitter4testing(false);
|
||||
@@ -442,7 +460,7 @@ int lck_seize(MDBX_env *env) {
|
||||
return err;
|
||||
}
|
||||
jitter4testing(false);
|
||||
err = funlock(fd4data, DXB_WHOLE);
|
||||
err = funlock(env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd, DXB_WHOLE);
|
||||
if (err != MDBX_SUCCESS)
|
||||
mdbx_panic("%s(%s) failed: err %u", __func__, "unlock-against-without-lck", err);
|
||||
}
|
||||
@@ -451,9 +469,7 @@ int lck_seize(MDBX_env *env) {
|
||||
}
|
||||
|
||||
int lck_downgrade(MDBX_env *env) {
|
||||
const HANDLE fd4data = env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd;
|
||||
/* Transite from exclusive-write state (E-E) to used (S-?) */
|
||||
assert(fd4data != INVALID_HANDLE_VALUE);
|
||||
assert(env->lck_mmap.fd != INVALID_HANDLE_VALUE);
|
||||
|
||||
if (env->flags & MDBX_EXCLUSIVE)
|
||||
@@ -465,7 +481,7 @@ int lck_downgrade(MDBX_env *env) {
|
||||
mdbx_panic("%s(%s) failed: err %u", __func__, "E-E(exclusive-write) >> ?-E(middle)", rc);
|
||||
|
||||
/* 2) now at ?-E (middle), transition to S-E (locked) */
|
||||
rc = flock(env->lck_mmap.fd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER);
|
||||
rc = flock_lck(env, LCK_SHARED, LCK_LOWER, TIMEOUT_LONG_MS);
|
||||
if (rc != MDBX_SUCCESS) {
|
||||
/* 3) something went wrong, give up */;
|
||||
ERROR("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
|
||||
@@ -490,7 +506,7 @@ int lck_upgrade(MDBX_env *env, bool dont_wait) {
|
||||
|
||||
/* 1) now on S-? (used), try S-E (locked) */
|
||||
jitter4testing(false);
|
||||
int rc = flock(env->lck_mmap.fd, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_UPPER);
|
||||
int rc = flock_lck(env, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_UPPER, 0);
|
||||
if (rc != MDBX_SUCCESS) {
|
||||
/* 2) something went wrong, give up */;
|
||||
VERBOSE("%s, err %u", "S-?(used) >> S-E(locked)", rc);
|
||||
@@ -504,7 +520,7 @@ int lck_upgrade(MDBX_env *env, bool dont_wait) {
|
||||
|
||||
/* 4) now on ?-E (middle), try E-E (exclusive-write) */
|
||||
jitter4testing(false);
|
||||
rc = flock(env->lck_mmap.fd, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_LOWER);
|
||||
rc = flock_lck(env, dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, LCK_LOWER, 0);
|
||||
if (rc != MDBX_SUCCESS) {
|
||||
/* 5) something went wrong, give up */;
|
||||
VERBOSE("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc);
|
||||
@@ -547,7 +563,7 @@ int lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor, const uint32_t curr
|
||||
if (synced && !inprocess_neighbor && env->lck_mmap.fd != INVALID_HANDLE_VALUE &&
|
||||
lck_upgrade(env, true) == MDBX_SUCCESS)
|
||||
/* this will fail if LCK is used/mmapped by other process(es) */
|
||||
osal_ftruncate(env->lck_mmap.fd, 0);
|
||||
osal_fsetsize(env->lck_mmap.fd, 0);
|
||||
}
|
||||
lck_unlock(env);
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
@@ -86,7 +86,10 @@ MDBX_INTERNAL void debug_log_va(int level, const char *function, int line, const
|
||||
#if MDBX_DEBUG
|
||||
#define ASSERT_FAIL(env, msg, func, line) mdbx_assert_fail(env, msg, func, line)
|
||||
#else /* MDBX_DEBUG */
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line);
|
||||
#if !((defined(_WIN32) || defined(_WIN64)) && defined(_DEBUG) && !MDBX_WITHOUT_MSVC_CRT)
|
||||
MDBX_NORETURN
|
||||
#endif
|
||||
__cold void assert_fail(const char *msg, const char *func, unsigned line);
|
||||
#define ASSERT_FAIL(env, msg, func, line) \
|
||||
do { \
|
||||
(void)(env); \
|
||||
|
||||
@@ -189,7 +189,7 @@
|
||||
|
||||
/** Avoid dependence from MSVC CRT and use ntdll.dll instead. */
|
||||
#ifndef MDBX_WITHOUT_MSVC_CRT
|
||||
#if defined(MDBX_BUILD_CXX) && !MDBX_BUILD_CXX
|
||||
#if defined(MDBX_BUILD_CXX) && !MDBX_BUILD_CXX && (defined(_WIN32) || defined(_WIN64))
|
||||
#define MDBX_WITHOUT_MSVC_CRT 1
|
||||
#else
|
||||
#define MDBX_WITHOUT_MSVC_CRT 0
|
||||
@@ -303,7 +303,8 @@
|
||||
((defined(_POSIX_THREAD_ROBUST_PRIO_INHERIT) && _POSIX_THREAD_ROBUST_PRIO_INHERIT > 0) || \
|
||||
(defined(_POSIX_THREAD_ROBUST_PRIO_PROTECT) && _POSIX_THREAD_ROBUST_PRIO_PROTECT > 0) || \
|
||||
defined(PTHREAD_MUTEX_ROBUST) || defined(PTHREAD_MUTEX_ROBUST_NP)) && \
|
||||
(!defined(__GLIBC__) || __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */)
|
||||
(!defined(__GLIBC__) || __GLIBC_PREREQ(2, 10) /* troubles with Robust mutexes before 2.10 */) && \
|
||||
!defined(__OHOS__) /* Harmony OS doesn't support robust mutexes at the end of 2025 */
|
||||
#define MDBX_LOCKING MDBX_LOCKING_POSIX2008
|
||||
#else
|
||||
#define MDBX_LOCKING MDBX_LOCKING_POSIX2001
|
||||
@@ -358,6 +359,22 @@
|
||||
#error MDBX_USE_COPYFILERANGE must be defined as 0 or 1
|
||||
#endif /* MDBX_USE_COPYFILERANGE */
|
||||
|
||||
/** Advanced: Using posix_fallocate() or fcntl(F_PREALLOCATE) on OSX (autodetection by default). */
|
||||
#ifndef MDBX_USE_FALLOCATE
|
||||
#if defined(__APPLE__)
|
||||
#define MDBX_USE_FALLOCATE 0 /* Too slow and unclean, but not required to prevent SIGBUS */
|
||||
#elif (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || (__GLIBC_PREREQ(2, 10) && defined(_GNU_SOURCE))
|
||||
#define MDBX_USE_FALLOCATE 1
|
||||
#else
|
||||
#define MDBX_USE_FALLOCATE 0
|
||||
#endif
|
||||
#define MDBX_USE_FALLOCATE_CONFIG "AUTO=" MDBX_STRINGIFY(MDBX_USE_FALLOCATE)
|
||||
#elif !(MDBX_USE_FALLOCATE == 0 || MDBX_USE_FALLOCATE == 1)
|
||||
#error MDBX_USE_FALLOCATE must be defined as 0 or 1
|
||||
#else
|
||||
#define MDBX_USE_FALLOCATE_CONFIG MDBX_STRINGIFY(MDBX_USE_FALLOCATE)
|
||||
#endif /* MDBX_USE_FALLOCATE */
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#ifndef MDBX_CPU_WRITEBACK_INCOHERENT
|
||||
|
||||
176
src/osal.c
176
src/osal.c
@@ -14,23 +14,6 @@
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
static int waitstatus2errcode(DWORD result) {
|
||||
switch (result) {
|
||||
case WAIT_OBJECT_0:
|
||||
return MDBX_SUCCESS;
|
||||
case WAIT_FAILED:
|
||||
return (int)GetLastError();
|
||||
case WAIT_ABANDONED:
|
||||
return ERROR_ABANDONED_WAIT_0;
|
||||
case WAIT_IO_COMPLETION:
|
||||
return ERROR_USER_APC;
|
||||
case WAIT_TIMEOUT:
|
||||
return ERROR_TIMEOUT;
|
||||
default:
|
||||
return ERROR_UNHANDLED_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* Map a result from an NTAPI call to WIN32 error code. */
|
||||
static int ntstatus2errcode(NTSTATUS status) {
|
||||
DWORD dummy;
|
||||
@@ -205,7 +188,7 @@ __cold void mdbx_assert_fail(const MDBX_env *env, const char *msg, const char *f
|
||||
assert_fail(msg, func, line);
|
||||
}
|
||||
|
||||
MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigned line) {
|
||||
__cold void assert_fail(const char *msg, const char *func, unsigned line) {
|
||||
#endif /* MDBX_DEBUG */
|
||||
|
||||
if (globals.logger.ptr)
|
||||
@@ -224,13 +207,19 @@ MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func, unsigne
|
||||
|
||||
while (1) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#if !MDBX_WITHOUT_MSVC_CRT && defined(_DEBUG)
|
||||
_CrtDbgReport(_CRT_ASSERT, func ? func : "unknown", line, "libmdbx", "assertion failed: %s", msg);
|
||||
#if defined(_DEBUG) && !MDBX_WITHOUT_MSVC_CRT
|
||||
if (_CrtDbgReport(_CRT_ASSERT, func ? func : "unknown", line, "libmdbx", "assertion failed: %s", msg) == 0)
|
||||
return /* user chooses the "Continue" button */;
|
||||
else {
|
||||
/* user chooses the "Retry" button */
|
||||
if (IsDebuggerPresent())
|
||||
DebugBreak();
|
||||
}
|
||||
#else
|
||||
if (IsDebuggerPresent())
|
||||
DebugBreak();
|
||||
#endif
|
||||
FatalExit(STATUS_ASSERTION_FAILURE);
|
||||
#endif
|
||||
#else
|
||||
abort();
|
||||
#endif
|
||||
@@ -252,14 +241,14 @@ __cold void mdbx_panic(const char *fmt, ...) {
|
||||
|
||||
while (1) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#if !MDBX_WITHOUT_MSVC_CRT && defined(_DEBUG)
|
||||
#if defined(_DEBUG) && !MDBX_WITHOUT_MSVC_CRT
|
||||
_CrtDbgReport(_CRT_ASSERT, "mdbx.c", 0, "libmdbx", "panic: %s", const_message);
|
||||
#else
|
||||
OutputDebugStringA("\r\nMDBX-PANIC: ");
|
||||
OutputDebugStringA(const_message);
|
||||
#endif
|
||||
if (IsDebuggerPresent())
|
||||
DebugBreak();
|
||||
#endif
|
||||
FatalExit(ERROR_UNHANDLED_ERROR);
|
||||
#else
|
||||
__assert_fail(const_message, "mdbx-panic", 0, const_message);
|
||||
@@ -414,7 +403,7 @@ int osal_condpair_destroy(osal_condpair_t *condpair) {
|
||||
int osal_condpair_lock(osal_condpair_t *condpair) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
DWORD code = WaitForSingleObject(condpair->mutex, INFINITE);
|
||||
return waitstatus2errcode(code);
|
||||
return osal_waitstatus2errcode(code);
|
||||
#else
|
||||
return osal_pthread_mutex_lock(&condpair->mutex);
|
||||
#endif
|
||||
@@ -444,7 +433,7 @@ int osal_condpair_wait(osal_condpair_t *condpair, bool part) {
|
||||
if (code == WAIT_OBJECT_0)
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
return waitstatus2errcode(code);
|
||||
return osal_waitstatus2errcode(code);
|
||||
#else
|
||||
return pthread_cond_wait(&condpair->cond[part], &condpair->mutex);
|
||||
#endif
|
||||
@@ -1186,7 +1175,7 @@ int osal_openfile(const enum osal_openfile_purpose purpose, const MDBX_env *env,
|
||||
case MDBX_OPEN_LCK:
|
||||
CreationDisposition = OPEN_ALWAYS;
|
||||
DesiredAccess |= GENERIC_READ | GENERIC_WRITE;
|
||||
FlagsAndAttributes |= FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_TEMPORARY;
|
||||
FlagsAndAttributes |= FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_OVERLAPPED;
|
||||
break;
|
||||
case MDBX_OPEN_DXB_READ:
|
||||
CreationDisposition = OPEN_EXISTING;
|
||||
@@ -1594,8 +1583,7 @@ int osal_is_pipe(mdbx_filehandle_t fd) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/* truncate file: just set the length of a file */
|
||||
int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) {
|
||||
int osal_fsetsize(mdbx_filehandle_t fd, const uint64_t length) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
if (imports.SetFileInformationByHandle) {
|
||||
FILE_END_OF_FILE_INFO EndOfFileInfo;
|
||||
@@ -1610,31 +1598,58 @@ int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length) {
|
||||
}
|
||||
#else
|
||||
STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), "libmdbx requires 64-bit file I/O on 64-bit systems");
|
||||
return ftruncate(fd, length) == 0 ? MDBX_SUCCESS : errno;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* extend file: set the length of a file AND ensure the space has been allocated */
|
||||
int osal_fallocate(mdbx_filehandle_t fd, uint64_t length) {
|
||||
assert(length > 0);
|
||||
int err = MDBX_RESULT_TRUE;
|
||||
#if (defined(__linux__) || defined(__gnu_linux__)) && \
|
||||
((defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 10)) || (defined(__ANDROID_API__) && __ANDROID_API__ >= 21))
|
||||
err = fallocate(fd, 0, 0, length) ? ignore_enosys_and_eremote(errno) : MDBX_SUCCESS;
|
||||
#elif defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L && !defined(__APPLE__)
|
||||
err = posix_fallocate(fd, 0, length) ? ignore_enosys_and_eremote(errno) : MDBX_SUCCESS;
|
||||
#elif defined(__APPLE__)
|
||||
fstore_t store = {F_ALLOCATEALL, F_PEOFPOSMODE, 0, length, 0};
|
||||
if (fcntl(fd, F_PREALLOCATE, &store))
|
||||
err = ignore_enosys_and_eremote(errno);
|
||||
#endif /* Apple */
|
||||
#if !defined(_WIN32) && !defined(_WIN64)
|
||||
/* Workaround for testing: ignore ENOSPC for TMPFS/RAMFS.
|
||||
* This is insignificant for production, but it helps in some tests using /dev/shm inside docker/containers. */
|
||||
if (err == ENOSPC && osal_check_fs_incore(fd) == MDBX_RESULT_TRUE)
|
||||
err = MDBX_RESULT_TRUE;
|
||||
#if MDBX_USE_FALLOCATE
|
||||
struct stat info;
|
||||
if (unlikely(fstat(fd, &info)))
|
||||
return errno;
|
||||
|
||||
const uint64_t allocated = UINT64_C(512) * info.st_blocks;
|
||||
if (length > allocated) {
|
||||
#if defined(__APPLE__)
|
||||
fstore_t store = {
|
||||
.fst_flags = F_ALLOCATECONTIG, .fst_posmode = F_PEOFPOSMODE, .fst_offset = 0, .fst_length = length};
|
||||
int err = MDBX_SUCCESS;
|
||||
if (fcntl(fd, F_PREALLOCATE, &store)) {
|
||||
/* TODO: implement step-by-step allocation in chunks of 16384, 8192, 4094, 2048, 1024 Kb */
|
||||
store.fst_flags = F_ALLOCATEALL;
|
||||
if (fcntl(fd, F_PREALLOCATE, &store))
|
||||
err = errno;
|
||||
}
|
||||
#elif defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L
|
||||
const int err = posix_fallocate(fd, 0, length);
|
||||
if (!err && length > (uint64_t)info.st_size)
|
||||
info.st_size = length /* posix_fallocate() extends the file */;
|
||||
#else
|
||||
const int err = fallocate(fd, 0, 0, length) ? errno : MDBX_SUCCESS;
|
||||
if (!err && length > (uint64_t)info.st_size)
|
||||
info.st_size = length /* fallocate() extends the file */;
|
||||
#endif
|
||||
if (unlikely(err) && ignore_enosys_and_eremote(err) != MDBX_RESULT_TRUE) {
|
||||
/* Workaround for testing: ignore ENOSPC for TMPFS/RAMFS.
|
||||
* This is insignificant for production, but it helps in some tests using /dev/shm inside docker/containers. */
|
||||
if (err != ENOSPC || osal_check_fs_incore(fd) != MDBX_RESULT_TRUE)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
if (length == (uint64_t)info.st_size)
|
||||
return MDBX_SUCCESS;
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) || defined(__gnu_linux__)
|
||||
if (globals.linux_kernel_version < 0x05110000 && globals.linux_kernel_version >= 0x050a0000) {
|
||||
struct statfs statfs_info;
|
||||
if (fstatfs(fd, &statfs_info))
|
||||
return errno;
|
||||
if (statfs_info.f_type == 0xEF53 /* EXT4_SUPER_MAGIC */ && unlikely(fdatasync(fd)))
|
||||
return errno;
|
||||
}
|
||||
#endif /* Linux */
|
||||
|
||||
return unlikely(ftruncate(fd, length)) ? errno : MDBX_SUCCESS;
|
||||
|
||||
#endif /* !Windows */
|
||||
return (err == MDBX_RESULT_TRUE) ? osal_ftruncate(fd, length) : err;
|
||||
}
|
||||
|
||||
int osal_fseek(mdbx_filehandle_t fd, uint64_t pos) {
|
||||
@@ -1662,7 +1677,7 @@ int osal_thread_create(osal_thread_t *thread, THREAD_RESULT(THREAD_CALL *start_r
|
||||
int osal_thread_join(osal_thread_t thread) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
DWORD code = WaitForSingleObject(thread, INFINITE);
|
||||
return waitstatus2errcode(code);
|
||||
return osal_waitstatus2errcode(code);
|
||||
#else
|
||||
void *unused_retval = &unused_retval;
|
||||
return pthread_join(thread, &unused_retval);
|
||||
@@ -2087,8 +2102,8 @@ int osal_mmap(const int flags, osal_mmap_t *map, size_t size, const size_t limit
|
||||
return err;
|
||||
|
||||
if ((flags & MDBX_RDONLY) == 0 && (options & MMAP_OPTION_SETLENGTH) != 0) {
|
||||
err = osal_fallocate(map->fd, size);
|
||||
VERBOSE("ftruncate %zu, err %d", size, err);
|
||||
err = osal_fsetsize(map->fd, size);
|
||||
VERBOSE("osal_fsetsize %zu, err %d", size, err);
|
||||
if (err != MDBX_SUCCESS)
|
||||
return err;
|
||||
map->filesize = size;
|
||||
@@ -2333,7 +2348,7 @@ retry_file_and_section:
|
||||
}
|
||||
|
||||
if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) {
|
||||
err = osal_fallocate(map->fd, size);
|
||||
err = osal_fsetsize(map->fd, size);
|
||||
if (err == MDBX_SUCCESS)
|
||||
map->filesize = size;
|
||||
/* ignore error, because Windows unable shrink file
|
||||
@@ -2413,11 +2428,11 @@ retry_mapview:;
|
||||
} else {
|
||||
if (map->filesize != size) {
|
||||
if (size > map->filesize) {
|
||||
rc = osal_fallocate(map->fd, size);
|
||||
VERBOSE("f%s-%s %zu, err %d", "allocate", "extend", size, rc);
|
||||
rc = osal_fsetsize(map->fd, size);
|
||||
VERBOSE("osal_fsetsize-%s %zu, err %d", "extend", size, rc);
|
||||
} else if (flags & txn_shrink_allowed) {
|
||||
rc = osal_ftruncate(map->fd, size);
|
||||
VERBOSE("f%s-%s %zu, err %d", "truncate", "shrink", size, rc);
|
||||
rc = osal_fsetsize(map->fd, size);
|
||||
VERBOSE("osal_fsetsize-%s %zu, err %d", "shrink", size, rc);
|
||||
}
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
@@ -3458,6 +3473,35 @@ bin128_t osal_guid(const MDBX_env *env) {
|
||||
return uuid;
|
||||
}
|
||||
|
||||
const char *osal_getenv(const char *name, bool secure) {
|
||||
(void)secure;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
static char buf[42];
|
||||
SetLastError(ERROR_OUT_OF_PAPER);
|
||||
const size_t len = GetEnvironmentVariableA(name, buf, sizeof(buf));
|
||||
if (len >= sizeof(buf))
|
||||
/* no idea haw to handle */
|
||||
return nullptr;
|
||||
if (len != 0)
|
||||
return buf;
|
||||
switch (GetLastError()) {
|
||||
case ERROR_OUT_OF_PAPER:
|
||||
return "";
|
||||
default:
|
||||
/* no idea to do in case of other error */
|
||||
case ERROR_ENVVAR_NOT_FOUND:
|
||||
return nullptr;
|
||||
}
|
||||
return (GetLastError() == ERROR_ENVVAR_NOT_FOUND) ? nullptr : "";
|
||||
#else
|
||||
#if defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 17)
|
||||
if (secure)
|
||||
return secure_getenv(name);
|
||||
#endif /* glibc >= 2.17 */
|
||||
return getenv(name);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
void osal_ctor(void) {
|
||||
@@ -3472,14 +3516,26 @@ void osal_ctor(void) {
|
||||
SYSTEM_INFO si;
|
||||
GetSystemInfo(&si);
|
||||
globals.sys_pagesize = si.dwPageSize;
|
||||
globals.sys_allocation_granularity = si.dwAllocationGranularity;
|
||||
globals.sys_allocation_granularity =
|
||||
(si.dwAllocationGranularity > globals.sys_pagesize) ? si.dwAllocationGranularity : globals.sys_pagesize;
|
||||
#else
|
||||
globals.sys_pagesize = sysconf(_SC_PAGE_SIZE);
|
||||
globals.sys_allocation_granularity = (MDBX_WORDBITS > 32) ? 65536 : 16384;
|
||||
globals.sys_allocation_granularity = (globals.sys_allocation_granularity >= globals.sys_pagesize * 2)
|
||||
? globals.sys_allocation_granularity
|
||||
: globals.sys_pagesize * 4;
|
||||
#ifdef AT_UCACHEBSIZE
|
||||
const size_t unified_cache_block_size = getauxval(AT_UCACHEBSIZE);
|
||||
globals.sys_unified_cache_block = globals.sys_pagesize;
|
||||
if (unified_cache_block_size > 0 && unified_cache_block_size < INT_MAX) {
|
||||
globals.sys_unified_cache_block = (unsigned)unified_cache_block_size;
|
||||
if (globals.sys_unified_cache_block > globals.sys_pagesize)
|
||||
globals.sys_allocation_granularity = globals.sys_unified_cache_block;
|
||||
}
|
||||
#endif /* AT_UCACHEBSIZE */
|
||||
#endif
|
||||
if (globals.sys_allocation_granularity > 4 * MEGABYTE && globals.sys_pagesize < MEGABYTE)
|
||||
globals.sys_allocation_granularity = 4 * MEGABYTE;
|
||||
assert(globals.sys_pagesize > 0 && (globals.sys_pagesize & (globals.sys_pagesize - 1)) == 0);
|
||||
assert(globals.sys_allocation_granularity >= globals.sys_pagesize &&
|
||||
globals.sys_allocation_granularity % globals.sys_pagesize == 0);
|
||||
|
||||
31
src/osal.h
31
src/osal.h
@@ -133,6 +133,14 @@ static inline void osal_free(void *ptr) { HeapFree(GetProcessHeap(), 0, ptr); }
|
||||
#define vsnprintf _vsnprintf /* ntdll */
|
||||
#endif
|
||||
|
||||
#ifndef strcasecmp
|
||||
#define strcasecmp _stricmp /* ntdll */
|
||||
#endif
|
||||
|
||||
#ifndef strncasecmp
|
||||
#define strncasecmp _strnicmp /* ntdll */
|
||||
#endif
|
||||
|
||||
#else /*----------------------------------------------------------------------*/
|
||||
|
||||
typedef pthread_t osal_thread_t;
|
||||
@@ -171,7 +179,7 @@ typedef char pathchar_t;
|
||||
#define MDBX_PRIsPATH "s"
|
||||
#endif
|
||||
|
||||
static inline bool osal_yield(void) {
|
||||
MDBX_MAYBE_UNUSED static inline bool osal_yield(void) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
return SleepEx(0, true) == WAIT_IO_COMPLETION;
|
||||
#else
|
||||
@@ -198,6 +206,23 @@ typedef struct osal_mmap {
|
||||
|
||||
#define MDBX_HAVE_PWRITEV 0
|
||||
|
||||
static inline int osal_waitstatus2errcode(DWORD result) {
|
||||
switch (result) {
|
||||
case WAIT_OBJECT_0:
|
||||
return MDBX_SUCCESS;
|
||||
case WAIT_FAILED:
|
||||
return (int)GetLastError();
|
||||
case WAIT_ABANDONED:
|
||||
return ERROR_ABANDONED_WAIT_0;
|
||||
case WAIT_IO_COMPLETION:
|
||||
return ERROR_USER_APC;
|
||||
case WAIT_TIMEOUT:
|
||||
return ERROR_TIMEOUT;
|
||||
default:
|
||||
return ERROR_UNHANDLED_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__ANDROID_API__)
|
||||
|
||||
#if __ANDROID_API__ < 24
|
||||
@@ -441,8 +466,7 @@ enum osal_syncmode_bits {
|
||||
};
|
||||
|
||||
MDBX_INTERNAL int osal_fsync(mdbx_filehandle_t fd, const enum osal_syncmode_bits mode_bits);
|
||||
MDBX_INTERNAL int osal_ftruncate(mdbx_filehandle_t fd, uint64_t length);
|
||||
MDBX_INTERNAL int osal_fallocate(mdbx_filehandle_t fd, uint64_t length);
|
||||
MDBX_INTERNAL int osal_fsetsize(mdbx_filehandle_t fd, const uint64_t length);
|
||||
MDBX_INTERNAL int osal_fseek(mdbx_filehandle_t fd, uint64_t pos);
|
||||
MDBX_INTERNAL int osal_filesize(mdbx_filehandle_t fd, uint64_t *length);
|
||||
|
||||
@@ -468,6 +492,7 @@ MDBX_MAYBE_UNUSED static inline bool osal_isdirsep(pathchar_t c) {
|
||||
c == '/';
|
||||
}
|
||||
|
||||
MDBX_INTERNAL const char *osal_getenv(const char *name, bool secure);
|
||||
MDBX_INTERNAL bool osal_pathequal(const pathchar_t *l, const pathchar_t *r, size_t len);
|
||||
MDBX_INTERNAL pathchar_t *osal_fileext(const pathchar_t *pathname, size_t len);
|
||||
MDBX_INTERNAL int osal_fileexists(const pathchar_t *pathname);
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS)
|
||||
|
||||
#ifndef _WIN32_WINNT
|
||||
#define _WIN32_WINNT 0x0601 /* Windows 7 */
|
||||
#define _WIN32_WINNT 0x0A00 /* Windows 10 */
|
||||
#endif /* _WIN32_WINNT */
|
||||
|
||||
#if !defined(_CRT_SECURE_NO_WARNINGS)
|
||||
@@ -428,6 +428,10 @@ __extern_C key_t ftok(const char *, int);
|
||||
#include <sys/time.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#if __GLIBC_PREREQ(2, 16) || __has_include(<sys/auxv.h>)
|
||||
#include <sys/auxv.h>
|
||||
#endif /* glibc >= 2.16 */
|
||||
|
||||
#endif /*---------------------------------------------------------------------*/
|
||||
|
||||
#if defined(__ANDROID_API__) || defined(ANDROID)
|
||||
|
||||
@@ -88,7 +88,7 @@ MDBX_INTERNAL int txn_ro_end(MDBX_txn *txn, unsigned mode);
|
||||
|
||||
/* env.c */
|
||||
MDBX_INTERNAL int env_open(MDBX_env *env, mdbx_mode_t mode);
|
||||
MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, size_t bytes, troika_t *troika);
|
||||
MDBX_INTERNAL int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, troika_t *troika);
|
||||
MDBX_INTERNAL int env_sync(MDBX_env *env, bool force, bool nonblock);
|
||||
MDBX_INTERNAL int env_close(MDBX_env *env, bool resurrect_after_fork);
|
||||
MDBX_INTERNAL MDBX_txn *env_owned_wrtxn(const MDBX_env *env);
|
||||
@@ -109,8 +109,12 @@ MDBX_INTERNAL void recalculate_merge_thresholds(MDBX_env *env);
|
||||
MDBX_INTERNAL void recalculate_subpage_thresholds(MDBX_env *env);
|
||||
|
||||
/* table.c */
|
||||
MDBX_INTERNAL int __must_check_result tbl_fetch(MDBX_txn *txn, size_t dbi);
|
||||
MDBX_INTERNAL int __must_check_result tbl_fetch(MDBX_txn *txn, MDBX_cursor *mc, size_t dbi, const MDBX_val *name,
|
||||
unsigned wanna_flags);
|
||||
MDBX_INTERNAL int __must_check_result tbl_create(MDBX_txn *txn, MDBX_cursor *mc, size_t slot, const MDBX_val *name,
|
||||
unsigned db_flags);
|
||||
MDBX_INTERNAL int __must_check_result tbl_setup(const MDBX_env *env, volatile kvx_t *const kvx, const tree_t *const db);
|
||||
MDBX_INTERNAL int __must_check_result tbl_refresh(MDBX_txn *txn, size_t dbi);
|
||||
|
||||
/* coherency.c */
|
||||
MDBX_INTERNAL bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, bool report);
|
||||
|
||||
118
src/table.c
118
src/table.c
@@ -37,67 +37,99 @@ int tbl_setup(const MDBX_env *env, volatile kvx_t *const kvx, const tree_t *cons
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int tbl_fetch(MDBX_txn *txn, size_t dbi) {
|
||||
cursor_couple_t couple;
|
||||
int rc = cursor_init(&couple.outer, txn, MAIN_DBI);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
int tbl_fetch(MDBX_txn *txn, MDBX_cursor *mc, size_t dbi, const MDBX_val *name, unsigned wanna_flags) {
|
||||
int err = cursor_init(mc, txn, MAIN_DBI);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
kvx_t *const kvx = &txn->env->kvs[dbi];
|
||||
rc = tree_search(&couple.outer, &kvx->name, 0);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
bailout:
|
||||
NOTICE("dbi %zu refs to inaccessible table `%.*s` for txn %" PRIaTXN " (err %d)", dbi, (int)kvx->name.iov_len,
|
||||
(const char *)kvx->name.iov_base, txn->txnid, rc);
|
||||
return (rc == MDBX_NOTFOUND) ? MDBX_BAD_DBI : rc;
|
||||
err = tree_search(mc, name, 0);
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
if (err == MDBX_NOTFOUND)
|
||||
goto notfound;
|
||||
return err;
|
||||
}
|
||||
|
||||
struct node_search_result nsr = node_search(mc, name);
|
||||
if (unlikely(!nsr.exact)) {
|
||||
notfound:
|
||||
if (dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) && !(wanna_flags & MDBX_CREATE))
|
||||
NOTICE("dbi %zu refs to non-existing table `%.*s` for txn %" PRIaTXN " (err %d)", dbi, (int)name->iov_len,
|
||||
(const char *)name->iov_base, txn->txnid, err);
|
||||
return MDBX_NOTFOUND;
|
||||
}
|
||||
|
||||
if (unlikely((node_flags(nsr.node) & (N_DUP | N_TREE)) != N_TREE)) {
|
||||
NOTICE("dbi %zu refs to not a named table `%.*s` for txn %" PRIaTXN " (%s)", dbi, (int)name->iov_len,
|
||||
(const char *)name->iov_base, txn->txnid, "wrong node-flags");
|
||||
return MDBX_INCOMPATIBLE /* not a named DB */;
|
||||
}
|
||||
|
||||
MDBX_val data;
|
||||
struct node_search_result nsr = node_search(&couple.outer, &kvx->name);
|
||||
if (unlikely(!nsr.exact)) {
|
||||
rc = MDBX_NOTFOUND;
|
||||
goto bailout;
|
||||
}
|
||||
if (unlikely((node_flags(nsr.node) & (N_DUP | N_TREE)) != N_TREE)) {
|
||||
NOTICE("dbi %zu refs to not a named table `%.*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len,
|
||||
(const char *)kvx->name.iov_base, txn->txnid, "wrong flags");
|
||||
return MDBX_INCOMPATIBLE; /* not a named DB */
|
||||
err = node_read(mc, nsr.node, &data, mc->pg[mc->top]);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
if (unlikely(data.iov_len < sizeof(tree_t))) {
|
||||
NOTICE("dbi %zu refs to not a named table `%.*s` for txn %" PRIaTXN " (%s)", dbi, (int)name->iov_len,
|
||||
(const char *)name->iov_base, txn->txnid, "wrong record-size");
|
||||
return MDBX_INCOMPATIBLE /* not a named DB */;
|
||||
}
|
||||
|
||||
rc = node_read(&couple.outer, nsr.node, &data, couple.outer.pg[couple.outer.top]);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
if (unlikely(data.iov_len != sizeof(tree_t))) {
|
||||
NOTICE("dbi %zu refs to not a named table `%.*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len,
|
||||
(const char *)kvx->name.iov_base, txn->txnid, "wrong rec-size");
|
||||
return MDBX_INCOMPATIBLE; /* not a named DB */
|
||||
}
|
||||
|
||||
uint16_t flags = UNALIGNED_PEEK_16(data.iov_base, tree_t, flags);
|
||||
const unsigned db_flags = UNALIGNED_PEEK_16(data.iov_base, tree_t, flags);
|
||||
const pgno_t db_root_pgno = peek_pgno(ptr_disp(data.iov_base, offsetof(tree_t, root)));
|
||||
/* The txn may not know this DBI, or another process may
|
||||
* have dropped and recreated the DB with other flags. */
|
||||
tree_t *const db = &txn->dbs[dbi];
|
||||
if (unlikely((db->flags & DB_PERSISTENT_FLAGS) != flags)) {
|
||||
if (unlikely((wanna_flags ^ db_flags) & DB_PERSISTENT_FLAGS) && !(wanna_flags & MDBX_DB_ACCEDE) &&
|
||||
!((wanna_flags & MDBX_CREATE) && db_root_pgno == P_INVALID)) {
|
||||
NOTICE("dbi %zu refs to the re-created table `%.*s` for txn %" PRIaTXN
|
||||
" with different flags (present 0x%X != wanna 0x%X)",
|
||||
dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, db->flags & DB_PERSISTENT_FLAGS,
|
||||
flags);
|
||||
return MDBX_INCOMPATIBLE;
|
||||
dbi, (int)name->iov_len, (const char *)name->iov_base, txn->txnid, db_flags & DB_PERSISTENT_FLAGS,
|
||||
wanna_flags & DB_PERSISTENT_FLAGS);
|
||||
return MDBX_INCOMPATIBLE /* not a named DB */;
|
||||
}
|
||||
|
||||
tree_t *const db = &txn->dbs[dbi];
|
||||
memcpy(db, data.iov_base, sizeof(tree_t));
|
||||
#if !MDBX_DISABLE_VALIDATION
|
||||
const txnid_t pp_txnid = couple.outer.pg[couple.outer.top]->txnid;
|
||||
tASSERT(txn, txn->front_txnid >= pp_txnid);
|
||||
if (unlikely(db->mod_txnid > pp_txnid)) {
|
||||
ERROR("db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", db->mod_txnid, pp_txnid);
|
||||
const txnid_t maindb_leafpage_txnid = mc->pg[mc->top]->txnid;
|
||||
tASSERT(txn, txn->front_txnid >= maindb_leafpage_txnid);
|
||||
if (unlikely(db->mod_txnid > maindb_leafpage_txnid)) {
|
||||
ERROR("db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", db->mod_txnid, maindb_leafpage_txnid);
|
||||
return MDBX_CORRUPTED;
|
||||
}
|
||||
#endif /* !MDBX_DISABLE_VALIDATION */
|
||||
rc = tbl_setup_ifneed(txn->env, kvx, db);
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int tbl_create(MDBX_txn *txn, MDBX_cursor *mc, size_t slot, const MDBX_val *name, unsigned db_flags) {
|
||||
tASSERT(txn, db_flags & MDBX_CREATE);
|
||||
MDBX_val body;
|
||||
body.iov_base = memset(&txn->dbs[slot], 0, body.iov_len = sizeof(tree_t));
|
||||
txn->dbs[slot].root = P_INVALID;
|
||||
txn->dbs[slot].mod_txnid = txn->txnid;
|
||||
txn->dbs[slot].flags = db_flags & DB_PERSISTENT_FLAGS;
|
||||
mc->next = txn->cursors[MAIN_DBI];
|
||||
txn->cursors[MAIN_DBI] = mc;
|
||||
int err = cursor_put_checklen(mc, name, &body, N_TREE | MDBX_NOOVERWRITE);
|
||||
txn->cursors[MAIN_DBI] = mc->next;
|
||||
if (likely(err == MDBX_SUCCESS)) {
|
||||
txn->flags |= MDBX_TXN_DIRTY;
|
||||
tASSERT(txn, (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) != 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int tbl_refresh(MDBX_txn *txn, size_t dbi) {
|
||||
cursor_couple_t couple;
|
||||
kvx_t *const kvx = &txn->env->kvs[dbi];
|
||||
int rc = tbl_fetch(txn, &couple.outer, dbi, &kvx->name, txn->dbs[dbi].flags);
|
||||
if (likely(rc != MDBX_SUCCESS))
|
||||
return dbi_gone(txn, dbi, rc);
|
||||
|
||||
rc = tbl_setup_ifneed(txn->env, kvx, &txn->dbs[dbi]);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
return dbi_gone(txn, dbi, rc);
|
||||
|
||||
if (unlikely(dbi_changed(txn, dbi)))
|
||||
return MDBX_BAD_DBI;
|
||||
|
||||
42
src/tls.c
42
src/tls.c
@@ -373,25 +373,27 @@ __cold static int rthc_drown(MDBX_env *const env) {
|
||||
int rc = MDBX_SUCCESS;
|
||||
MDBX_env *inprocess_neighbor = nullptr;
|
||||
if (likely(env->lck_mmap.lck && current_pid == env->pid)) {
|
||||
reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0];
|
||||
reader_slot_t *const end = &env->lck_mmap.lck->rdt[env->max_readers];
|
||||
TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", (current_pid == env->pid) ? "cleanup" : "skip",
|
||||
__Wpedantic_format_voidptr(env), env->pid, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end),
|
||||
current_pid);
|
||||
bool cleaned = false;
|
||||
for (reader_slot_t *r = begin; r < end; ++r) {
|
||||
if (atomic_load32(&r->pid, mo_Relaxed) == current_pid) {
|
||||
atomic_store32(&r->pid, 0, mo_AcquireRelease);
|
||||
TRACE("== cleanup %p", __Wpedantic_format_voidptr(r));
|
||||
cleaned = true;
|
||||
}
|
||||
}
|
||||
if (cleaned)
|
||||
atomic_store32(&env->lck_mmap.lck->rdt_refresh_flag, true, mo_Relaxed);
|
||||
rc = rthc_uniq_check(&env->lck_mmap, &inprocess_neighbor);
|
||||
if (!inprocess_neighbor && env->registered_reader_pid && env->lck_mmap.fd != INVALID_HANDLE_VALUE) {
|
||||
int err = lck_rpid_clear(env);
|
||||
rc = rc ? rc : err;
|
||||
if (!inprocess_neighbor) {
|
||||
reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0];
|
||||
reader_slot_t *const end = &env->lck_mmap.lck->rdt[env->max_readers];
|
||||
TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", (current_pid == env->pid) ? "cleanup" : "skip",
|
||||
__Wpedantic_format_voidptr(env), env->pid, __Wpedantic_format_voidptr(begin),
|
||||
__Wpedantic_format_voidptr(end), current_pid);
|
||||
bool cleaned = false;
|
||||
for (reader_slot_t *r = begin; r < end; ++r) {
|
||||
if (atomic_load32(&r->pid, mo_Relaxed) == current_pid) {
|
||||
atomic_store32(&r->pid, 0, mo_AcquireRelease);
|
||||
TRACE("== cleanup %p", __Wpedantic_format_voidptr(r));
|
||||
cleaned = true;
|
||||
}
|
||||
}
|
||||
if (cleaned)
|
||||
atomic_store32(&env->lck_mmap.lck->rdt_refresh_flag, true, mo_Relaxed);
|
||||
if (env->registered_reader_pid && env->lck_mmap.fd != INVALID_HANDLE_VALUE) {
|
||||
int err = lck_rpid_clear(env);
|
||||
rc = rc ? rc : err;
|
||||
}
|
||||
}
|
||||
}
|
||||
int err = lck_destroy(env, inprocess_neighbor, current_pid);
|
||||
@@ -513,7 +515,9 @@ __cold void rthc_dtor(const uint32_t current_pid) {
|
||||
MDBX_env *const env = rthc_table[i].env;
|
||||
if (env->pid != current_pid)
|
||||
continue;
|
||||
if (!(env->flags & ENV_TXKEY))
|
||||
if (!env->lck_mmap.lck || env->lck_mmap.base == MAP_FAILED)
|
||||
continue;
|
||||
if (!(env->flags & ENV_TXKEY) || !env->lck_mmap.lck)
|
||||
continue;
|
||||
env->flags -= ENV_TXKEY;
|
||||
reader_slot_t *const begin = &env->lck_mmap.lck->rdt[0];
|
||||
|
||||
@@ -414,7 +414,6 @@ int main(int argc, char *argv[]) {
|
||||
if (argc < 2)
|
||||
usage(prog);
|
||||
|
||||
double elapsed;
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
uint64_t timestamp_start, timestamp_finish;
|
||||
timestamp_start = GetMilliseconds();
|
||||
@@ -652,23 +651,26 @@ bailout:
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
timestamp_finish = GetMilliseconds();
|
||||
elapsed = (timestamp_finish - timestamp_start) * 1e-3;
|
||||
const uint64_t elapsed_msec = (timestamp_finish - timestamp_start);
|
||||
#else
|
||||
if (clock_gettime(CLOCK_MONOTONIC, ×tamp_finish)) {
|
||||
error_fn("clock_gettime", errno);
|
||||
return EXIT_FAILURE_SYS;
|
||||
}
|
||||
elapsed =
|
||||
timestamp_finish.tv_sec - timestamp_start.tv_sec + (timestamp_finish.tv_nsec - timestamp_start.tv_nsec) * 1e-9;
|
||||
const uint64_t elapsed_msec = UINT64_C(1000) * (timestamp_finish.tv_sec - timestamp_start.tv_sec) +
|
||||
(timestamp_finish.tv_nsec - timestamp_start.tv_nsec) / 1000000;
|
||||
#endif /* !WINDOWS */
|
||||
|
||||
const size_t elapsed_seconds = (size_t)(elapsed_msec / 1000u);
|
||||
const size_t elapsed_mod_ms = (size_t)(elapsed_msec % 1000u);
|
||||
if (chk.result.total_problems) {
|
||||
print_ln(MDBX_chk_result, "Total %" PRIuSIZE " error%s detected, elapsed %.3f seconds.", chk.result.total_problems,
|
||||
(chk.result.total_problems > 1) ? "s are" : " is", elapsed);
|
||||
print_ln(MDBX_chk_result, "Total %" PRIuSIZE " error%s detected, elapsed %zu.%03zu seconds.",
|
||||
chk.result.total_problems, (chk.result.total_problems > 1) ? "s are" : " is", elapsed_seconds,
|
||||
elapsed_mod_ms);
|
||||
if (chk.result.problems_meta || chk.result.problems_kv || chk.result.problems_gc)
|
||||
return EXIT_FAILURE_CHECK_MAJOR;
|
||||
return EXIT_FAILURE_CHECK_MINOR;
|
||||
}
|
||||
print_ln(MDBX_chk_result, "No error is detected, elapsed %.3f seconds.", elapsed);
|
||||
print_ln(MDBX_chk_result, "No error is detected, elapsed %zu.%03zu seconds.", elapsed_seconds, elapsed_mod_ms);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -341,8 +341,8 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
print_stat(&mst);
|
||||
|
||||
pgno_t pages = 0, *iptr;
|
||||
pgno_t reclaimable = 0;
|
||||
size_t gc_pages = 0, *iptr;
|
||||
size_t gc_reclaimable = 0;
|
||||
MDBX_val key, data;
|
||||
while (MDBX_SUCCESS == (rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT))) {
|
||||
if (user_break) {
|
||||
@@ -352,9 +352,9 @@ int main(int argc, char *argv[]) {
|
||||
iptr = data.iov_base;
|
||||
const pgno_t number = *iptr++;
|
||||
|
||||
pages += number;
|
||||
gc_pages += number;
|
||||
if (envinfo && mei.mi_latter_reader_txnid > *(txnid_t *)key.iov_base)
|
||||
reclaimable += number;
|
||||
gc_reclaimable += number;
|
||||
|
||||
if (freinfo > 1) {
|
||||
char *bad = "";
|
||||
@@ -402,36 +402,43 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
if (envinfo) {
|
||||
uint64_t value = mei.mi_mapsize / mei.mi_dxb_pagesize;
|
||||
double percent = value / 100.0;
|
||||
printf("Page Usage\n");
|
||||
printf(" Total: %" PRIu64 " 100%%\n", value);
|
||||
char buffer[64];
|
||||
|
||||
value = mei.mi_geo.current / mei.mi_dxb_pagesize;
|
||||
printf(" Backed: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
puts("Page Usage");
|
||||
const size_t total_pages = mei.mi_mapsize / mei.mi_dxb_pagesize;
|
||||
printf(" Total: %" PRIuSIZE " 100%%\n", total_pages);
|
||||
|
||||
value = mei.mi_last_pgno + 1;
|
||||
printf(" Allocated: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
const size_t backed_pages = mei.mi_geo.current / mei.mi_dxb_pagesize;
|
||||
printf(" Backed: %" PRIuSIZE " %s%%\n", backed_pages,
|
||||
mdbx_ratio2percents(backed_pages, total_pages, buffer, sizeof(buffer)));
|
||||
|
||||
value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1);
|
||||
printf(" Remained: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
const size_t allocated_pages = mei.mi_last_pgno + 1;
|
||||
printf(" Allocated: %" PRIuSIZE " %s%%\n", allocated_pages,
|
||||
mdbx_ratio2percents(allocated_pages, total_pages, buffer, sizeof(buffer)));
|
||||
|
||||
value = mei.mi_last_pgno + 1 - pages;
|
||||
printf(" Used: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
const size_t remained_pages = total_pages - allocated_pages;
|
||||
printf(" Remained: %" PRIuSIZE " %s%%\n", remained_pages,
|
||||
mdbx_ratio2percents(remained_pages, total_pages, buffer, sizeof(buffer)));
|
||||
|
||||
value = pages;
|
||||
printf(" GC: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
const size_t used_pages = allocated_pages - gc_pages;
|
||||
printf(" Used: %" PRIuSIZE " %s%%\n", used_pages,
|
||||
mdbx_ratio2percents(used_pages, total_pages, buffer, sizeof(buffer)));
|
||||
|
||||
value = pages - reclaimable;
|
||||
printf(" Retained: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
printf(" GC: %" PRIuSIZE " %s%%\n", gc_pages,
|
||||
mdbx_ratio2percents(gc_pages, total_pages, buffer, sizeof(buffer)));
|
||||
|
||||
value = reclaimable;
|
||||
printf(" Reclaimable: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
printf(" Reclaimable: %" PRIuSIZE " %s%%\n", gc_reclaimable,
|
||||
mdbx_ratio2percents(gc_reclaimable, total_pages, buffer, sizeof(buffer)));
|
||||
|
||||
value = mei.mi_mapsize / mei.mi_dxb_pagesize - (mei.mi_last_pgno + 1) + reclaimable;
|
||||
printf(" Available: %" PRIu64 " %.1f%%\n", value, value / percent);
|
||||
const size_t gc_retained = gc_pages - gc_reclaimable;
|
||||
printf(" Retained: %" PRIuSIZE " %s%%\n", gc_retained,
|
||||
mdbx_ratio2percents(gc_retained, total_pages, buffer, sizeof(buffer)));
|
||||
|
||||
const size_t available_pages = gc_reclaimable + remained_pages;
|
||||
printf(" Available: %" PRIuSIZE " %s%%\n", available_pages,
|
||||
mdbx_ratio2percents(available_pages, total_pages, buffer, sizeof(buffer)));
|
||||
} else
|
||||
printf(" GC: %" PRIaPGNO " pages\n", pages);
|
||||
printf(" GC: %" PRIuSIZE " pages\n", gc_pages);
|
||||
}
|
||||
|
||||
rc = mdbx_dbi_open(txn, table, MDBX_DB_ACCEDE, &dbi);
|
||||
|
||||
@@ -39,7 +39,7 @@ __hot int tree_search(MDBX_cursor *mc, const MDBX_val *key, int flags) {
|
||||
|
||||
const size_t dbi = cursor_dbi(mc);
|
||||
if (unlikely(*cursor_dbi_state(mc) & DBI_STALE)) {
|
||||
err = tbl_fetch(mc->txn, dbi);
|
||||
err = tbl_refresh_absent2baddbi(mc->txn, dbi);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
@@ -168,6 +168,15 @@ int txn_ro_end(MDBX_txn *txn, unsigned mode) {
|
||||
txn->ro.slot = nullptr;
|
||||
else {
|
||||
eASSERT(env, slot->pid.weak == env->pid);
|
||||
if (unlikely(slot->pid.weak == 0)) {
|
||||
txn->flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED | MDBX_TXN_OUSTED;
|
||||
txn->owner = 0;
|
||||
if (mode & TXN_END_FREE) {
|
||||
txn->signature = 0;
|
||||
osal_free(txn);
|
||||
}
|
||||
return LOG_IFERR(MDBX_BAD_RSLOT);
|
||||
}
|
||||
if (likely((txn->flags & MDBX_TXN_FINISHED) == 0)) {
|
||||
if (likely((txn->flags & MDBX_TXN_PARKED) == 0)) {
|
||||
ENSURE(env, txn->txnid >=
|
||||
|
||||
@@ -47,7 +47,7 @@ int txn_shadow_cursors(const MDBX_txn *parent, const size_t dbi) {
|
||||
int err = cursor_shadow(cursor, txn, dbi);
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
/* не получилось забекапить курсоры */
|
||||
txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO | DBI_STALE;
|
||||
txn->dbi_state[dbi] = DBI_OLDEN | DBI_LINDO;
|
||||
txn->flags |= MDBX_TXN_ERROR;
|
||||
return err;
|
||||
}
|
||||
|
||||
49
src/utils.c
49
src/utils.c
@@ -41,3 +41,52 @@ MDBX_NOTHROW_CONST_FUNCTION uint64_t rrxmrrxmsx_0(uint64_t v) {
|
||||
v *= UINT64_C(0x9FB21C651E98DF25);
|
||||
return v ^ v >> 28;
|
||||
}
|
||||
|
||||
__cold char *ratio2digits(const uint64_t v, const uint64_t d, ratio2digits_buffer_t *const buffer, int precision) {
|
||||
assert(d > 0 && precision < 20);
|
||||
char *const dot = buffer->string + 21;
|
||||
uint64_t i = v / d, f = v % d, m = d;
|
||||
|
||||
char *tail = dot;
|
||||
bool carry = m - f < m / 2;
|
||||
if (precision > 0) {
|
||||
*tail = '.';
|
||||
do {
|
||||
while (unlikely(f > UINT64_MAX / 10)) {
|
||||
f >>= 1;
|
||||
m >>= 1;
|
||||
}
|
||||
f *= 10;
|
||||
assert(tail > buffer->string && tail < ARRAY_END(buffer->string) - 1);
|
||||
*++tail = '0' + (char)(f / m);
|
||||
f %= m;
|
||||
} while (--precision && tail < ARRAY_END(buffer->string) - 1);
|
||||
|
||||
carry = m - f < m / 2;
|
||||
for (char *scan = tail; carry && scan > dot; --scan)
|
||||
*scan = (carry = *scan == '9') ? '0' : *scan + 1;
|
||||
}
|
||||
assert(tail > buffer->string && tail < ARRAY_END(buffer->string) - 1);
|
||||
*++tail = '\0';
|
||||
|
||||
char *head = dot;
|
||||
i += carry;
|
||||
while (i > 9) {
|
||||
assert(head > buffer->string && head < ARRAY_END(buffer->string));
|
||||
*--head = '0' + (char)(i % 10);
|
||||
i /= 10;
|
||||
}
|
||||
assert(head > buffer->string && head < ARRAY_END(buffer->string));
|
||||
*--head = '0' + (char)i;
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
__cold char *ratio2percent(uint64_t value, uint64_t whole, ratio2digits_buffer_t *buffer) {
|
||||
while (unlikely(value > UINT64_MAX / 100)) {
|
||||
value >>= 1;
|
||||
whole >>= 1;
|
||||
}
|
||||
const bool rough = whole >= value && (!value || value > whole / 16);
|
||||
return ratio2digits(value * 100, whole, buffer, rough ? 1 : 2);
|
||||
}
|
||||
|
||||
@@ -76,3 +76,10 @@ MDBX_MAYBE_UNUSED static inline uint64_t monotime_since_cached(uint64_t begin_ti
|
||||
}
|
||||
return cache->value - begin_timestamp;
|
||||
}
|
||||
|
||||
typedef struct ratio2digits_buffer {
|
||||
char string[1 + 20 + 1 + 19 + 1];
|
||||
} ratio2digits_buffer_t;
|
||||
|
||||
char *ratio2digits(const uint64_t v, const uint64_t d, ratio2digits_buffer_t *const buffer, int precision);
|
||||
char *ratio2percent(const uint64_t v, const uint64_t d, ratio2digits_buffer_t *const buffer);
|
||||
|
||||
@@ -8,7 +8,7 @@ DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
TEST="${DIR}/stochastic.sh --skip-make --db-upto-gb 32"
|
||||
PREFIX="/dev/shm/mdbxtest-"
|
||||
|
||||
NUMACTL="$(which numactl 2>-)"
|
||||
NUMACTL="$(which numactl 2>&-)"
|
||||
NUMALIST=()
|
||||
NUMAIDX=0
|
||||
if [ -n "${NUMACTL}" -a $(${NUMACTL} --hardware | grep 'node [0-9]\+ cpus' | wc -l) -gt 1 ]; then
|
||||
|
||||
@@ -3,10 +3,6 @@
|
||||
|
||||
#include "test.h++"
|
||||
|
||||
#if defined(_MSC_VER) && !defined(strcasecmp)
|
||||
#define strcasecmp(str, len) _stricmp(str, len)
|
||||
#endif /* _MSC_VER && strcasecmp() */
|
||||
|
||||
namespace config {
|
||||
|
||||
bool parse_option(int argc, char *const argv[], int &narg, const char *option, const char **value,
|
||||
|
||||
@@ -22,9 +22,7 @@ void testcase_jitter::check_dbi_error(int expect, const char *stage) {
|
||||
|
||||
bool testcase_jitter::run() {
|
||||
int err;
|
||||
size_t upper_limit = config.params.size_upper;
|
||||
if (upper_limit < 1)
|
||||
upper_limit = config.params.size_now * 2;
|
||||
size_t upper_limit = (config.params.size_upper < 1) ? config.params.size_now * 2 : config.params.size_upper;
|
||||
|
||||
tablename_buf buffer;
|
||||
const char *const tablename = db_tablename(buffer);
|
||||
|
||||
@@ -9,32 +9,15 @@ static std::unordered_map<unsigned, HANDLE> events;
|
||||
static HANDLE hBarrierSemaphore, hBarrierEvent;
|
||||
static HANDLE hProgressActiveEvent, hProgressPassiveEvent;
|
||||
|
||||
static int waitstatus2errcode(DWORD result) {
|
||||
switch (result) {
|
||||
case WAIT_OBJECT_0:
|
||||
return MDBX_SUCCESS;
|
||||
case WAIT_FAILED:
|
||||
return GetLastError();
|
||||
case WAIT_ABANDONED:
|
||||
return ERROR_ABANDONED_WAIT_0;
|
||||
case WAIT_IO_COMPLETION:
|
||||
return ERROR_USER_APC;
|
||||
case WAIT_TIMEOUT:
|
||||
return ERROR_TIMEOUT;
|
||||
default:
|
||||
return ERROR_UNHANDLED_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void osal_wait4barrier(void) {
|
||||
DWORD rc = WaitForSingleObject(hBarrierSemaphore, 0);
|
||||
switch (rc) {
|
||||
default:
|
||||
failure_perror("WaitForSingleObject(BarrierSemaphore)", waitstatus2errcode(rc));
|
||||
failure_perror("WaitForSingleObject(BarrierSemaphore)", osal_waitstatus2errcode(rc));
|
||||
case WAIT_OBJECT_0:
|
||||
rc = WaitForSingleObject(hBarrierEvent, INFINITE);
|
||||
if (rc != WAIT_OBJECT_0)
|
||||
failure_perror("WaitForSingleObject(BarrierEvent)", waitstatus2errcode(rc));
|
||||
failure_perror("WaitForSingleObject(BarrierEvent)", osal_waitstatus2errcode(rc));
|
||||
break;
|
||||
case WAIT_TIMEOUT:
|
||||
if (!SetEvent(hBarrierEvent))
|
||||
@@ -95,7 +78,7 @@ void osal_broadcast(unsigned id) {
|
||||
int osal_waitfor(unsigned id) {
|
||||
log_trace("osal_waitfor: event %u", id);
|
||||
DWORD rc = WaitForSingleObject(events.at(id), INFINITE);
|
||||
return waitstatus2errcode(rc);
|
||||
return osal_waitstatus2errcode(rc);
|
||||
}
|
||||
|
||||
int osal_delay(unsigned seconds) {
|
||||
@@ -397,7 +380,7 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return waitstatus2errcode(rc);
|
||||
return osal_waitstatus2errcode(rc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -418,7 +401,7 @@ void osal_udelay(size_t us) {
|
||||
if (us > threshold_us && us > 1000) {
|
||||
DWORD rc = SleepEx(unsigned(us / 1000), TRUE);
|
||||
if (rc)
|
||||
failure_perror("SleepEx()", waitstatus2errcode(rc));
|
||||
failure_perror("SleepEx()", osal_waitstatus2errcode(rc));
|
||||
us = 0;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user