Compare commits

..

80 Commits

Author SHA1 Message Date
Leonid Yuriev
eefbb7f063 mdbx: remove extra prep_backlog_data() inside update_gc().
Change-Id: Ic6250159c5abe03ed0705c7a432c5eb5b17b790e
2019-07-09 15:58:48 +03:00
Leonid Yuriev
9dc0b6a3f3 mdbx-test: partialy revert long_stochastic.sh changes.
Change-Id: I1fd4d601a91b50e82bbf8f183c968d31d671957e
2019-07-09 14:31:27 +03:00
Leonid Yuriev
4ecf78163d mdbx: fix/refine mdbx_find_largest().
Change-Id: Ib40ca0014da18793be0bdae5133806304ad42819
2019-07-09 03:27:23 +03:00
Leonid Yuriev
421f4b01e6 mdbx-test: refine internals (delays, logging).
Change-Id: Ife19e3bc1b124629891ba03fd9e1bf3b2ad3e301
2019-07-09 01:39:32 +03:00
Leonid Yuriev
53563e75bb mdbx: fix DB-shrinking race with copy-asis & readers.
Change-Id: I1e05616de03d814850a1c8ad20e83941b2d1c911
2019-07-09 00:41:04 +03:00
Leonid Yuriev
bd5078347b mdbx: refine backlog preparation inside update_gc().
Change-Id: Ib18842c2922afba794d6ab69337580bcea29bfe6
2019-07-09 00:41:04 +03:00
Leonid Yuriev
06f16464aa mdbx-test: adds more jitter while DB opening.
Change-Id: Ic0f03f4fbe064ea00f719baa11b28cc24aafcb5b
2019-07-07 21:21:54 +03:00
Leonid Yuriev
1f89c494ff mdbx: refines OFF_T_MAX if it is not defined.
Change-Id: Ic453140e01725493cad0c98e73244c747d5f490e
2019-07-07 20:04:39 +03:00
Leonid Yuriev
8606803344 mdbx: adds paranoid mvcc-checks into read transactions.
Change-Id: I7f0f05010306b02b3b1ab8478217de70ffd211a0
2019-07-07 19:51:06 +03:00
Leonid Yuriev
41de36e93d mdbx: avoids infinite copy_file_range-loop when file is unexpectedly truncated.
Change-Id: I5a542a49fa4ac9ced8ca60266a084d52d3618804
2019-07-07 18:42:14 +03:00
Leonid Yuriev
30ac62ceec mdbx-test: add coredump status.
Change-Id: I9003a891744eb78d91a3e279e20efcb4c1ad736c
2019-07-07 02:33:39 +03:00
Leonid Yuriev
7e1e142104 mdbx-chk: fix/refine error handling for sub-DBs.
Change-Id: I42460793bbe47815add1b3f61f3746f671a749d9
2019-07-07 01:47:10 +03:00
Leonid Yuriev
e2f37908b9 mdbx: fix/refine error handling while DB-pages walking.
Change-Id: If910e96cc7c30577531aab24b5a9573e5fe9126d
2019-07-06 21:25:11 +03:00
Leonid Yuriev
6d4e151ba8 mdbx-test: fix comment typo (minor).
Change-Id: I7bd25ccfc52371c78f9011ccd45203e44e619142
2019-07-02 00:21:51 +03:00
Leonid Yuriev
40112ebd62 mdbx: engage copy_file_range() for env_copy_as_is.
Change-Id: I7ea17914d80500ffa70451b80920d726f0e9c2f8
2019-07-01 18:37:36 +03:00
Leonid Yuriev
6960c45e59 mdbx: avoids EFAULT "Bad address" while copy-as-is if DB is swapped-out from RAM.
Change-Id: I711efc1c54a04745bd561bc5e1db5e6f6d8b7115
2019-07-01 17:55:58 +03:00
Leonid Yuriev
2e60256978 mdbx: drop EPIPE/SIGPIPE handling (since pipes are disallowed).
Change-Id: I56d4539333edea93cc1a2c3606cf959c82b98b19
2019-07-01 16:39:18 +03:00
Leonid Yuriev
38110579ba mdbx: drops mdbx_write(), using mdbx_pwrite() instead of.
Change-Id: Iff3de2d5ef3fa2e92607d46b96d4526e464e593b
2019-07-01 16:39:18 +03:00
Leonid Yuriev
08c334c8bc mdbx-test: limit DB-size 3Gb for long_stochastic.
Change-Id: I52bc6cd4aea7a41d5b1bdabc2c8e4bd6e34a78a9
2019-06-27 10:53:35 +03:00
Leonid Yuriev
aaf49bb816 mdbx-test: don't fail on key-space overflow.
Change-Id: I22a8cb359849c4c02cd393047cb7ea33974607fd
2019-06-27 10:40:00 +03:00
Leonid Yuriev
fa3adb759a mdbx-test: reduce upper txn-volume inside long_stochastic. 2019-06-26 10:11:24 +03:00
Leonid Yuriev
834f6d0784 mdbx-test: fix iteration count.
Change-Id: Iaf19af417e54ee4ad968722c94d377dab29be149
2019-06-25 15:48:03 +03:00
Leonid Yuriev
41d8f65e1e mdbx-test: biggest case depending on the DB size inside long_stochastic.
Change-Id: Ia8c09dd6b4240d76de1356aa1eecaa884636086a
2019-06-25 14:26:57 +03:00
Leonid Yuriev
c9c985ae5d mdbx-test: more for --ignore-dbfull option.
Change-Id: I92c284edd889455eefefec12d3315b6f5d37cdc5
2019-06-24 02:55:26 +03:00
Leonid Yuriev
da99dcdb87 mdbx-test: long_stochastic.
Change-Id: I28248a8af9041dfa62388a3b4ded7e2a4fdc07a9
2019-06-24 02:14:50 +03:00
Leonid Yuriev
5b88fe819c mdbx: fix pwrite() for WRITE_MAX.
Change-Id: If4924d20c1e267c2d3a190c860b89fc2fda0d517
2019-06-24 02:14:50 +03:00
Leonid Yuriev
f627b33379 mdbx-test: fix dbsize-options handling.
Change-Id: Ia51f802ac1ad4e8b1b059a3f3b38214bda6b43fc
2019-06-24 02:14:50 +03:00
Leonid Yuriev
cf004dddbc mdbx: use single cursor instance inside mdbx_env_walk().
Change-Id: I72cade64468a42fd27ebb4955d71ecbbabe64987
2019-06-24 02:14:50 +03:00
Leonid Yuriev
2d5a3ebd8f mdbx-test: add --ignore-dbfull option (major).
Change-Id: I252f9c3679a371722a780913ba994ca3dee9b90a
2019-06-24 02:14:50 +03:00
Leonid Yuriev
728f98d3de mdbx-check: refine leaf-pages info (cosmetics).
Change-Id: I0fdb467f1c1d51bfcdcef5edfe99c8e9ad66037e
2019-06-23 14:26:11 +03:00
Leonid Yuriev
bbf8ef0a4b mdbx-chk: fix space-usage statistics info.
Change-Id: I0cbbbc481f2e6dc37b29f6603ec1ead43b5d1864
2019-06-23 14:07:13 +03:00
Leonid Yuriev
05cf301774 mdbx: bump version to v0.3.0
Change-Id: Ic09361eda834c75d4fdb37dcbe5e8edbf0317f9b
2019-06-22 22:23:25 +03:00
Leonid Yuriev
47beba1782 mdbx: more 'unlikely'.
Change-Id: I472e4a922590cd4680a48416611cfd894fa120db
2019-06-22 22:23:17 +03:00
Leonid Yuriev
e3f8dc5501 mdbx: rework RECLAIMING inside update_gc().
Change-Id: I9cf592476780bfdb346472baa12497d68a3d5aad
2019-06-22 22:23:17 +03:00
Leonid Yuriev
15403aadad mdbx-test: re-seed keygen over iterations.
Change-Id: I2cfd635fc46c808dd8431217b75a30780e0c3958
2019-06-22 22:23:17 +03:00
Leonid Yuriev
e6ad443178 mdbx-test: refine 'ttl' testcase.
Change-Id: Ic4d759cfa29496bd46fa50fef1e974847b52bb41
2019-06-22 13:16:54 +03:00
Leonid Yuriev
243b01dd63 mdbx-test: refine 'append' testcast (minor).
Change-Id: I79ea16046713a085e62e01eeb0978fc4e6766750
2019-06-22 13:16:54 +03:00
Leonid Yuriev
3fc610f860 mdbx-test: use common keygen-seed for ttl testcase.
Change-Id: I921fff0ee28df8a18b6a38801c275de3fa2563ab
2019-06-22 13:16:54 +03:00
Leonid Yuriev
bfa9fc25d6 mdbx-test: 5-repeats inside gc-test script. 2019-06-22 13:16:54 +03:00
Leonid Yuriev
2219802bca mdbx-test: more for ttl testcase.
Change-Id: I8a01963345a2e815ebb39a98939420b8edb53968
2019-06-22 02:00:52 +03:00
Leonid Yuriev
be0ec1d38d mdbx: fix GC corruption due deep recursive rebalance from update_gc().
Change-Id: I810250deb25cd625e737000282b434e3158ef8cc
2019-06-22 02:00:47 +03:00
Leonid Yuriev
9cf9d6eac2 mdbx-test: add ttl testcase.
Change-Id: Ia5d164fde250e959226a53c63fcaf024ffe965a2
2019-06-22 00:53:52 +03:00
Leonid Yuriev
eecec74e21 mdbx: more unlikely (minor).
Change-Id: Id6139473b3e6a7c3f099acc64db3180448294d0a
2019-06-21 02:11:28 +03:00
Leonid Yuriev
52bc4a7f41 mdbx-check: minor refine to avoid deeply recursion.
Change-Id: I67f83a232ef47899f43c242b7e6295de4d7ec909
2019-06-20 21:08:31 +03:00
Leonid Yuriev
49d0e872a1 mdbx: iterate & check before recursion inside mdbx_env_pgwalk().
Change-Id: I27058a33f6dece0c3f206283a42ff74e5727417f
2019-06-20 21:04:47 +03:00
Leonid Yuriev
c91cc85c1f mdbx-chk: log sub-DBs when verbose > 0 (cosmetic).
Change-Id: Ie73c0773929b51eb11fb02afe18bb01b59fb2612
2019-06-20 10:12:26 +03:00
Leonid Yuriev
86cfd86cda mdbx-test: support for repeat parameter.
Change-Id: I6de52cd21314935c123ac51537e1b893c39dd5ed
2019-06-20 02:41:22 +03:00
Leonid Yuriev
cd75c4f081 mdbx-chk: avoid continuing to check bad records.
Change-Id: I03b9d425c8413d6cacc1b67ed4a8253a10a9d603
2019-06-20 00:52:00 +03:00
Leonid Yuriev
2bea60a1a4 mdbx-chk: avoid infinite loop/recursion while checking corrupted DB.
Change-Id: I3edb053e4baedced8ce8e8cfa25f9851eaca35d1
2019-06-19 15:08:50 +03:00
Leonid Yuriev
c05702eacf mdbx: add MDBX_PGWALK-tags and refine pgwalk internals.
Change-Id: I1f4eb79463dc6eec3d94d43baab0b28ceefa8c03
2019-06-19 15:05:10 +03:00
Leonid Yuriev
ce0e5d67f5 mdbx-tools: avoid output NaN from mdbx_chk for empty tables.
Change-Id: Ie1ff87da3a5e5e124eac1dafd7d5b456f8bde6e3
2019-06-10 13:28:31 +03:00
Leonid Yuriev
48655b41fb mdbx-ci: switch to Xenial.
Change-Id: Ibb4e397d1d405add92c6252fd31080197efeb9a2
2019-05-28 21:23:49 +03:00
Leonid Yuriev
b443477869 mdbx: workaround for Coverity Scan.
Change-Id: I0e2d22bbbd38ac7978fb8879219ded79a5be1b0a
2019-05-28 21:19:54 +03:00
Leonid Yuriev
870c2a6f9c mdbx: symmetrical/invariant mdbx_estimate_range() for MDBX_EPSILON order.
Change-Id: Ida7e07d6429576c457bcd4d877a3c38c88dc2771
2019-05-26 00:59:35 +03:00
Leonid Yuriev
e26b7501eb mdbx: add MDBX_EPSILON support for mdbx_estimate_range().
Change-Id: I2d89a9f20bfa16c8f35a4381709bc54f86f0ff67
2019-05-25 19:10:38 +03:00
Leonid Yuriev
b4002a8484 libmdbx: fix TAGRET typo (minor).
Change-Id: Iffafbed7fdad3492aeb51f17caf8109a5b3e35c0
2019-05-02 16:46:05 +03:00
Leonid Yuriev
6e3725457d mdbx: fix minor comment typo.
Change-Id: I56a465e820a49d13c49fb3bd05add970b0eebb14
2019-03-14 00:13:13 +03:00
Leo Yuriev
73f8839a97 mdbx: minor refine/clarify estimation internals. 2019-03-06 16:45:49 +03:00
Leo Yuriev
501eb8c6ad mdbx: more __hot/__cold attributes for functions. 2019-03-06 16:45:01 +03:00
Leonid Yuriev
7f8cd66e11 mdbx: add notes about range query estimation into READMEs.
Change-Id: Ia9e0b7e393082115839483ea7a3b37fb37ba0308
2019-03-06 00:40:53 +03:00
Leo Yuriev
ee899a21ed mdbx: treat pagesize == 0/INTPTR_MAX as aliases for MIN_PAGESIZE/MAX_PAGESIZE. 2019-03-05 17:55:33 +03:00
Leo Yuriev
3535e7a6d6 mdbx: returns as-is (i.e. negative) the estimation results for an inverted ranges. 2019-03-05 15:50:45 +03:00
Leonid Yuriev
8ddfd1f34a mdbx: adds functions for distance/move/range estimation (initial).
Change-Id: If59eccf7311123ab6384c4b93f9b1fed5a0a10d1
2019-03-05 02:57:15 +03:00
Leo Yuriev
7d383350e8 mdbx: workaround for musl-libc __assert_fail() prototype bug. 2019-03-04 14:41:50 +03:00
Leo Yuriev
9ffd17d58b mdbx: refine mdbx_filesync() to avoid hide fdatasync() error. 2019-03-04 13:53:05 +03:00
Leo Yuriev
9f410597df mdbx: checking only _POSIX_SYNCHRONIZED_IO for fdatasync (musl). 2019-03-04 13:39:33 +03:00
Leo Yuriev
cca2c91058 mdbx: don't check __GLIBC_PREREQ/_BSD_SOURCE/_XOPEN_SOURCE for use fsync (musl). 2019-03-04 13:34:38 +03:00
Leo Yuriev
46b551e386 mdbx: add in-source definition for _POSIX_C_SOURCE and _XOPEN_SOURCE (musl). 2019-03-04 13:33:27 +03:00
Leonid Yuriev
14ae9fb2a1 mdbx: env_set_geometry() treat zero-values also as defaults.
Change-Id: If8c6f7d7bbeffe71ae4e28f27184103dd1da257b
2019-03-02 14:26:09 +03:00
Leonid Yuriev
01797cf1bc mdbx: env_open() consider zero mode_t as open-existing flag.
Change-Id: I6f9dbf2059822afaba4c3de8f4ce380613a7dc36
2019-03-02 13:31:08 +03:00
Leonid Yuriev
7617cce0c6 mdbx-tools: fix Coverity warning (paranoia).
Change-Id: I3ff33a9eb2c58fe601566fd4101f9c95d76d29de
2019-02-27 23:26:15 +03:00
Leo Yuriev
ac6d423451 mdbx: fix one more comment typo (minor). 2019-02-13 20:23:43 +03:00
Leo Yuriev
44a067283a mdbx: fix comment typo (minor). 2019-02-12 13:39:16 +03:00
Leonid Yuriev
49fa9b9c35 mdbx: fix handling MDBX_APPENDDUP mode.
Change-Id: I36de2a8dcab5126dab3857a7840ab3904a1d19c8
2019-02-04 01:41:11 +03:00
Leonid Yuriev
0639f54280 mdbx-test: add 'append' testcase.
Change-Id: I71620ea1a019e16b8e3d84a81dcc042961eae5b5
2019-02-04 01:41:11 +03:00
Leonid Yuriev
73bef80347 mdbx-check: add checking for complete duplicates.
Change-Id: I8308b725418ef69188eeadfc656dead4ce9cee27
2019-02-04 01:32:15 +03:00
Leonid Yuriev
460751bc01 mdbx-chk: add '-i' option for custom comparators.
Change-Id: Ie3d7fdb3c3a881a484d351ca9a3160eb467b43b9
2019-02-04 01:32:15 +03:00
Leo Yuriev
309955be75 mdbx-load: add '-a' option for loading dumps of custom-sorted DBs.
Based on http://www.openldap.org/devel/gitweb.cgi?p=openldap.git;a=commitdiff;h=aa77c832b8e6fc696078017f550d119cdfc0f232

Change-Id: If7de71c8f6ffc29d4316c6074995fab38f2c1b4b

+load

Change-Id: Iff6cbca2514840ee290f801e3b273edf160913b4
2019-02-04 00:06:39 +03:00
Leo Yuriev
9ba8434c1d mdbx: fix __ANDROID__ typo.
Thank to Howard Chu <hyc@openldap.org>.

Change-Id: Ibcbe2e4790a5df5758d9fd6c621793ea42a94682
2019-02-03 17:53:00 +03:00
Howard Chu
bfffaa66b8 mdbx: import - tweak mdb_page_split (ITS#8969).
Bump up number of keys for which we use fine-grained splitpoint search

Change-Id: Icca2e1953cbcd6898b790f657636c2195b397790
2019-02-03 13:07:17 +03:00
36 changed files with 1960 additions and 657 deletions

View File

@@ -1,6 +1,5 @@
language: c
sudo: required
dist: trusty
dist: xenial
compiler:
- gcc
@@ -26,5 +25,5 @@ addons:
description: "Build submitted via Travis CI"
notification_email: leo@yuriev.ru
build_command_prepend: "make clean"
build_command: "make all -j 4"
build_command: "make all -j 2"
branch_pattern: coverity_scan

View File

@@ -82,11 +82,11 @@ clean:
rm -rf $(TOOLS) mdbx_test @* *.[ao] *.[ls]o *~ tmp.db/* *.gcov *.log *.err src/*.o test/*.o
check: all
rm -f $(TESTDB) $(TESTLOG) && (set -o pipefail; ./mdbx_test --pathname=$(TESTDB) --dont-cleanup-after basic | tee -a $(TESTLOG) | tail -n 42) \
rm -f $(TESTDB) $(TESTLOG) && (set -o pipefail; ./mdbx_test --repeat=42 --pathname=$(TESTDB) --dont-cleanup-after basic | tee -a $(TESTLOG) | tail -n 42) \
&& ./mdbx_chk -vvn $(TESTDB) && ./mdbx_chk -vvn $(TESTDB)-copy
check-singleprocess: all
rm -f $(TESTDB) $(TESTLOG) && (set -o pipefail; ./mdbx_test --pathname=$(TESTDB) --dont-cleanup-after --hill --copy | tee -a $(TESTLOG) | tail -n 42) \
rm -f $(TESTDB) $(TESTLOG) && (set -o pipefail; ./mdbx_test --repeat=42 --pathname=$(TESTDB) --dont-cleanup-after --hill --copy | tee -a $(TESTLOG) | tail -n 42) \
&& ./mdbx_chk -vvn $(TESTDB) && ./mdbx_chk -vvn $(TESTDB)-copy
check-fault: all

View File

@@ -171,31 +171,18 @@ Amplification Factor) и RAF (Read Amplification Factor) также Olog(N).
Доработки и усовершенствования относительно LMDB
================================================
1. Утилита `mdbx_chk` для проверки целостности структуры БД.
2. Автоматическое динамическое управление размером БД согласно
1. Автоматическое динамическое управление размером БД согласно
параметрам задаваемым функцией `mdbx_env_set_geometry()`, включая шаг
приращения и порог уменьшения размера БД, а также выбор размера
страницы. Соответственно, это позволяет снизить фрагментированность
файла БД на диске и освободить место, в том числе в **Windows**.
3. Автоматическая без-затратная компактификация БД путем возврата
2. Автоматическая без-затратная компактификация БД путем возврата
освобождающихся страниц в область нераспределенного резерва в конце
файла данных. При этом уменьшается количество страниц находящихся в
памяти и участвующих в в обмене с диском.
4. Поддержка ключей и значений нулевой длины, включая сортированные
дубликаты.
5. Возможность связать с каждой завершаемой транзакцией до 3
дополнительных маркеров посредством `mdbx_canary_put()`, и прочитать их
в транзакции чтения посредством `mdbx_canary_get()`.
6. Возможность посредством `mdbx_replace()` обновить или удалить запись
с получением предыдущего значения данных, а также адресно изменить
конкретное multi-значение.
7. Режим `LIFO RECLAIM`.
3. Режим `LIFO RECLAIM`.
Для повторного использования выбираются не самые старые, а
самые новые страницы из доступных. За счет этого цикл
@@ -209,9 +196,27 @@ Amplification Factor) и RAF (Read Amplification Factor) также Olog(N).
многократное увеличение производительности по записи
(обновлению данных).
8. Генерация последовательностей посредством `mdbx_dbi_sequence()`.
4. Быстрая оценка количества элементов попадающих в запрашиваемый
диапазон значений ключа посредством функций `mdbx_estimate_range()`,
`mdbx_estimate_move()` и `mdbx_estimate_distance()` для выбора
оптимального плана выполнения запроса.
9. Обработчик `OOM-KICK`.
5. Утилита `mdbx_chk` для проверки целостности структуры БД.
6. Поддержка ключей и значений нулевой длины, включая сортированные
дубликаты.
7. Возможность связать с каждой завершаемой транзакцией до 3
дополнительных маркеров посредством `mdbx_canary_put()`, и прочитать их
в транзакции чтения посредством `mdbx_canary_get()`.
8. Возможность посредством `mdbx_replace()` обновить или удалить запись
с получением предыдущего значения данных, а также адресно изменить
конкретное multi-значение.
9. Генерация последовательностей посредством `mdbx_dbi_sequence()`.
10. Обработчик `OOM-KICK`.
Посредством `mdbx_env_set_oomfunc()` может быть установлен
внешний обработчик (callback), который будет вызван при
@@ -232,83 +237,83 @@ Amplification Factor) и RAF (Read Amplification Factor) также Olog(N).
* прервать текущую операцию изменения данных с возвратом кода
ошибки.
10. Возможность открыть БД в эксклюзивном режиме посредством флага
`MDBX_EXCLUSIVE`.
11. Возможность открыть БД в эксклюзивном режиме посредством флага
`MDBX_EXCLUSIVE`, в том числе на сетевом носителе.
11. Возможность получить отставание текущей транзакции чтения от
12. Возможность получить отставание текущей транзакции чтения от
последней версии данных в БД посредством `mdbx_txn_straggler()`.
12. Возможность явно запросить обновление существующей записи, без
13. Возможность явно запросить обновление существующей записи, без
создания новой посредством флажка `MDBX_CURRENT` для `mdbx_put()`.
13. Исправленный вариант `mdbx_cursor_count()`, возвращающий корректное
14. Исправленный вариант `mdbx_cursor_count()`, возвращающий корректное
количество дубликатов для всех типов таблиц и любого положения курсора.
14. Возможность получить посредством `mdbx_env_info()` дополнительную
15. Возможность получить посредством `mdbx_env_info()` дополнительную
информацию, включая номер самой старой версии БД (снимка данных),
который используется одним из читателей.
15. Функция `mdbx_del()` не игнорирует дополнительный (уточняющий)
16. Функция `mdbx_del()` не игнорирует дополнительный (уточняющий)
аргумент `data` для таблиц без дубликатов (без флажка `MDBX_DUPSORT`), а
при его ненулевом значении всегда использует его для сверки с удаляемой
записью.
16. Возможность открыть dbi-таблицу, одновременно с установкой
17. Возможность открыть dbi-таблицу, одновременно с установкой
компараторов для ключей и данных, посредством `mdbx_dbi_open_ex()`.
17. Возможность посредством `mdbx_is_dirty()` определить находятся ли
18. Возможность посредством `mdbx_is_dirty()` определить находятся ли
некоторый ключ или данные в "грязной" странице БД. Таким образом,
избегая лишнего копирования данных перед выполнением модифицирующих
операций (значения, размещенные в "грязных" страницах, могут быть
перезаписаны при изменениях, иначе они будут неизменны).
18. Корректное обновление текущей записи, в том числе сортированного
19. Корректное обновление текущей записи, в том числе сортированного
дубликата, при использовании режима `MDBX_CURRENT` в
`mdbx_cursor_put()`.
19. Возможность узнать есть ли за текущей позицией курсора строка данных
20. Возможность узнать есть ли за текущей позицией курсора строка данных
посредством `mdbx_cursor_eof()`.
20. Дополнительный код ошибки `MDBX_EMULTIVAL`, который возвращается из
21. Дополнительный код ошибки `MDBX_EMULTIVAL`, который возвращается из
`mdbx_put()` и `mdbx_replace()` при попытке выполнить неоднозначное
обновление или удаления одного из нескольких значений с одним ключом.
21. Возможность посредством `mdbx_get_ex()` получить значение по
22. Возможность посредством `mdbx_get_ex()` получить значение по
заданному ключу, одновременно с количеством дубликатов.
22. Наличие функций `mdbx_cursor_on_first()` и `mdbx_cursor_on_last()`,
23. Наличие функций `mdbx_cursor_on_first()` и `mdbx_cursor_on_last()`,
которые позволяют быстро выяснить стоит ли курсор на первой/последней
позиции.
23. Возможность автоматического формирования контрольных точек (сброса
24. Возможность автоматического формирования контрольных точек (сброса
данных на диск) при накоплении заданного объёма изменений,
устанавливаемого функцией `mdbx_env_set_syncbytes()`.
24. Управление отладкой и получение отладочных сообщений посредством
25. Управление отладкой и получение отладочных сообщений посредством
`mdbx_setup_debug()`.
25. Функция `mdbx_env_pgwalk()` для обхода всех страниц БД.
26. Функция `mdbx_env_pgwalk()` для обхода всех страниц БД.
26. Три мета-страницы вместо двух, что позволяет гарантированно
27. Три мета-страницы вместо двух, что позволяет гарантированно
консистентно обновлять слабые контрольные точки фиксации без риска
повредить крайнюю сильную точку фиксации.
27. Гарантия сохранности БД в режиме `WRITEMAP+MAPSYNC`.
28. Гарантия сохранности БД в режиме `WRITEMAP+MAPSYNC`.
> В текущей версии _libmdbx_ вам предоставляется выбор между безопасным
> режимом (по умолчанию) асинхронной фиксации, и режимом `UTTERLY_NOSYNC`
> когда при системной аварии есть шанс полного разрушения БД как в LMDB.
> Для подробностей смотрите раздел
> [Сохранность данных в режиме асинхронной фиксации](#Сохранность-данных-в-режиме-асинхронной-фиксации).
28. Возможность закрыть БД в "грязном" состоянии (без сброса данных и
29. Возможность закрыть БД в "грязном" состоянии (без сброса данных и
формирования сильной точки фиксации) посредством `mdbx_env_close_ex()`.
29. При завершении читающих транзакций, открытые в них DBI-хендлы не
30. При завершении читающих транзакций, открытые в них DBI-хендлы не
закрываются и не теряются при завершении таких транзакций посредством
`mdbx_txn_abort()` или `mdbx_txn_reset()`. Что позволяет избавится от ряда
сложно обнаруживаемых ошибок.
30. Все курсоры, как в транзакциях только для чтения, так и в пишущих,
31. Все курсоры, как в транзакциях только для чтения, так и в пишущих,
могут быть переиспользованы посредством `mdbx_cursor_renew()` и ДОЛЖНЫ
ОСВОБОЖДАТЬСЯ ЯВНО.
>
@@ -682,11 +687,11 @@ $ objdump -f -h -j .text libmdbx.so
libmdbx.so: file format elf64-x86-64
architecture: i386:x86-64, flags 0x00000150:
HAS_SYMS, DYNAMIC, D_PAGED
start address 0x000030e0
start address 0x0000000000003870
Sections:
Idx Name Size VMA LMA File off Algn
11 .text 00014d84 00000000000030e0 00000000000030e0 000030e0 2**4
11 .text 000173d4 0000000000003870 0000000000003870 00003870 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
```

View File

@@ -170,30 +170,18 @@ regular maintenance. Backups can be made on the fly on working DB
Improvements over LMDB
======================
1. `mdbx_chk` tool for DB integrity check.
2. Automatic dynamic DB size management according to the parameters
specified by `mdbx_env_set_geometry()` function. Including including
1. Automatic dynamic DB size management according to the parameters
specified by `mdbx_env_set_geometry()` function. Including
growth step and truncation threshold, as well as the choice of page
size.
3. Automatic returning of freed pages into unallocated space at the end
of database file with optionally automatic shrinking it. This reduces
2. Automatic returning of freed pages into unallocated space at the end
of database file, with optionally automatic shrinking it. This reduces
amount of pages resides in RAM and circulated in disk I/O. In fact
_libmdbx_ constantly performs DB compactification, without spending
additional resources for that.
4. Support for keys and values of zero length, including sorted
duplicates.
5. Ability to assign up to 3 markers to commiting transaction with
`mdbx_canary_put()` and then get them in read transaction by
`mdbx_canary_get()`.
6. Ability to update or delete record and get previous value via
`mdbx_replace()` Also can update specific multi-value.
7. `LIFO RECLAIM` mode:
3. `LIFO RECLAIM` mode:
The newest pages are picked for reuse instead of the oldest. This allows
to minimize reclaim loop and make it execution time independent of total
@@ -204,9 +192,25 @@ duplicates.
[BBWC](https://en.wikipedia.org/wiki/Disk_buffer#Write_acceleration)
this may greatly improve write performance.
8. Sequence generation via `mdbx_dbi_sequence()`.
4. Fast estimation of range query result size via functions `mdbx_estimate_range()`,
`mdbx_estimate_move()` and `mdbx_estimate_distance()`. E.g. for selection the
optimal query execution plan.
9. `OOM-KICK` callback.
5. `mdbx_chk` tool for DB integrity check.
6. Support for keys and values of zero length, including sorted
duplicates.
7. Ability to assign up to 3 markers to commiting transaction with
`mdbx_canary_put()` and then get them in read transaction by
`mdbx_canary_get()`.
8. Ability to update or delete record and get previous value via
`mdbx_replace()`. Also can update specific multi-value.
9. Sequence generation via `mdbx_dbi_sequence()`.
10. `OOM-KICK` callback.
`mdbx_env_set_oomfunc()` allows to set a callback, which will be called
in the event of DB space exhausting during long-time read transaction in
@@ -224,75 +228,75 @@ duplicates.
* abort current write transaction with returning error code.
10. Ability to open DB in exclusive mode with `MDBX_EXCLUSIVE` flag.
11. Ability to open DB in exclusive mode with `MDBX_EXCLUSIVE` flag.
11. Ability to get how far current read-only snapshot is from latest
12. Ability to get how far current read-only snapshot is from latest
version of the DB by `mdbx_txn_straggler()`.
12. Ability to explicitly request update of present record without
13. Ability to explicitly request update of present record without
creating new record. Implemented as `MDBX_CURRENT` flag for
`mdbx_put()`.
13. Fixed `mdbx_cursor_count()`, which returns correct count of
14. Fixed `mdbx_cursor_count()`, which returns correct count of
duplicated for all table types and any cursor position.
14. `mdbx_env_info()` to getting additional info, including number of
15. `mdbx_env_info()` to getting additional info, including number of
the oldest snapshot of DB, which is used by one of the readers.
15. `mdbx_del()` doesn't ignore additional argument (specifier) `data`
16. `mdbx_del()` doesn't ignore additional argument (specifier) `data`
for tables without duplicates (without flag `MDBX_DUPSORT`), if `data`
is not null then always uses it to verify record, which is being
deleted.
16. Ability to open dbi-table with simultaneous setup of comparators for
17. Ability to open dbi-table with simultaneous setup of comparators for
keys and values, via `mdbx_dbi_open_ex()`.
17. `mdbx_is_dirty()`to find out if key or value is on dirty page, that
18. `mdbx_is_dirty()`to find out if key or value is on dirty page, that
useful to avoid copy-out before updates.
18. Correct update of current record in `MDBX_CURRENT` mode of
19. Correct update of current record in `MDBX_CURRENT` mode of
`mdbx_cursor_put()`, including sorted duplicated.
19. Check if there is a row with data after current cursor position via
20. Check if there is a row with data after current cursor position via
`mdbx_cursor_eof()`.
20. Additional error code `MDBX_EMULTIVAL`, which is returned by
21. Additional error code `MDBX_EMULTIVAL`, which is returned by
`mdbx_put()` and `mdbx_replace()` in case is ambiguous update or delete.
21. Ability to get value by key and duplicates count by `mdbx_get_ex()`.
22. Ability to get value by key and duplicates count by `mdbx_get_ex()`.
22. Functions `mdbx_cursor_on_first()` and `mdbx_cursor_on_last()`,
23. Functions `mdbx_cursor_on_first()` and `mdbx_cursor_on_last()`,
which allows to know if cursor is currently on first or last position
respectively.
23. Automatic creation of synchronization points (flush changes to
24. Automatic creation of synchronization points (flush changes to
persistent storage) when changes reach set threshold (threshold can be
set by `mdbx_env_set_syncbytes()`).
24. Control over debugging and receiving of debugging messages via
25. Control over debugging and receiving of debugging messages via
`mdbx_setup_debug()`.
25. Function `mdbx_env_pgwalk()` for page-walking all pages in DB.
26. Function `mdbx_env_pgwalk()` for page-walking all pages in DB.
26. Three meta-pages instead of two, this allows to guarantee
27. Three meta-pages instead of two, this allows to guarantee
consistently update weak sync-points without risking to corrupt last
steady sync-point.
27. Guarantee of DB integrity in `WRITEMAP+MAPSYNC` mode:
28. Guarantee of DB integrity in `WRITEMAP+MAPSYNC` mode:
> Current _libmdbx_ gives a choice of safe async-write mode (default)
> and `UTTERLY_NOSYNC` mode which may result in full
> DB corruption during system crash as with LMDB. For details see
> [Data safety in async-write mode](#data-safety-in-async-write-mode).
28. Ability to close DB in "dirty" state (without data flush and
29. Ability to close DB in "dirty" state (without data flush and
creation of steady synchronization point) via `mdbx_env_close_ex()`.
29. If read transaction is aborted via `mdbx_txn_abort()` or
30. If read transaction is aborted via `mdbx_txn_abort()` or
`mdbx_txn_reset()` then DBI-handles, which were opened in it, aren't
closed or deleted. This allows to avoid several types of hard-to-debug
errors.
30. All cursors in all read and write transactions can be reused by
31. All cursors in all read and write transactions can be reused by
`mdbx_cursor_renew()` and MUST be freed explicitly.
> ## Caution, please pay attention!
>
@@ -594,11 +598,11 @@ $ objdump -f -h -j .text libmdbx.so
libmdbx.so: file format elf64-x86-64
architecture: i386:x86-64, flags 0x00000150:
HAS_SYMS, DYNAMIC, D_PAGED
start address 0x000030e0
start address 0x0000000000003870
Sections:
Idx Name Size VMA LMA File off Algn
11 .text 00014d84 00000000000030e0 00000000000030e0 000030e0 2**4
11 .text 000173d4 0000000000003870 0000000000003870 00003870 2**4
CONTENTS, ALLOC, LOAD, READONLY, CODE
```

View File

@@ -3,7 +3,9 @@ README-RU.md
pcrf_test/CMakeLists.txt
src/tools/CMakeLists.txt
test/CMakeLists.txt
test/append.cc
test/copy.cc
test/ttl.cc
tutorial/CMakeLists.txt
tutorial/sample-mdbx.c
AUTHORS

65
mdbx.h
View File

@@ -168,7 +168,7 @@ typedef pthread_t mdbx_tid_t;
/*--------------------------------------------------------------------------*/
#define MDBX_VERSION_MAJOR 0
#define MDBX_VERSION_MINOR 2
#define MDBX_VERSION_MINOR 3
#if defined(LIBMDBX_EXPORTS)
#define LIBMDBX_API __dll_export
@@ -1666,11 +1666,16 @@ typedef enum {
MDBX_subpage_dupfixed_leaf
} MDBX_page_type_t;
typedef int MDBX_pgvisitor_func(uint64_t pgno, unsigned number, void *ctx,
int deep, const char *dbi, size_t page_size,
MDBX_page_type_t type, size_t nentries,
size_t payload_bytes, size_t header_bytes,
size_t unused_bytes);
#define MDBX_PGWALK_MAIN ((const char *)((ptrdiff_t)0))
#define MDBX_PGWALK_GC ((const char *)((ptrdiff_t)-1))
#define MDBX_PGWALK_META ((const char *)((ptrdiff_t)-2))
typedef int
MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx,
const int deep, const char *const dbi,
const size_t page_size, const MDBX_page_type_t type,
const size_t nentries, const size_t payload_bytes,
const size_t header_bytes, const size_t unused_bytes);
LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
void *ctx);
@@ -1695,6 +1700,54 @@ LIBMDBX_API int mdbx_cursor_on_first(MDBX_cursor *mc);
/* Returns: MDBX_RESULT_TRUE, MDBX_RESULT_FALSE or Error code. */
LIBMDBX_API int mdbx_cursor_on_last(MDBX_cursor *mc);
/* Estimates the distance between cursors as the number of elements.
* Both cursors must be initialized for the same DBI.
*
* [in] cursor_a The first cursor for estimation.
* [in] cursor_b The second cursor for estimation.
* [out] distance_items A pointer to store estimated distance value,
* i.e. *distance_items = distance(a - b).
*
* Returns A non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_estimate_distance(const MDBX_cursor *first,
const MDBX_cursor *last,
ptrdiff_t *distance_items);
/* Estimates the move distance, i.e. between the current cursor position and
* next position after the specified move-operation with given key and data.
* Current cursor position and state are preserved.
*
* [in] cursor Cursor for estimation.
* [in,out] key The key for a retrieved item.
* [in,out] data The data of a retrieved item.
* [in] op A cursor operation MDBX_cursor_op.
* [out] distance_items A pointer to store estimated move distance
* as the number of elements.
*
* Returns A non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key,
MDBX_val *data, MDBX_cursor_op move_op,
ptrdiff_t *distance_items);
/* Estimates the size of a range in the number of elements.
*
* [in] txn A transaction handle returned by mdbx_txn_begin().
* [in] dbi A database handle returned by mdbx_dbi_open().
* [in] begin_key The key of range beginning or NULL for explicit FIRST.
* [in] begin_data Optional additional data to seeking among sorted
* duplicates. Only for MDBX_DUPSORT, NULL otherwise.
* [in] end_key The key of range ending or NULL for explicit LAST.
* [in] end_data Optional additional data to seeking among sorted
* duplicates. Only for MDBX_DUPSORT, NULL otherwise.
* [out] distance_items A pointer to store range estimation result.
*
* Returns A non-zero error value on failure and 0 on success. */
#define MDBX_EPSILON ((MDBX_val *)((ptrdiff_t)-1))
LIBMDBX_API int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi,
MDBX_val *begin_key, MDBX_val *begin_data,
MDBX_val *end_key, MDBX_val *end_data,
ptrdiff_t *size_items);
LIBMDBX_API int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
MDBX_val *new_data, MDBX_val *old_data,
unsigned flags);

View File

@@ -29,6 +29,12 @@
#ifndef _GNU_SOURCE
# define _GNU_SOURCE 1
#endif
#ifndef _POSIX_C_SOURCE
# define _POSIX_C_SOURCE 200112L
#endif
#ifndef _XOPEN_SOURCE
# define _XOPEN_SOURCE 500
#endif
#ifndef _FILE_OFFSET_BITS
# define _FILE_OFFSET_BITS 64
#endif
@@ -253,11 +259,14 @@ typedef struct MDBX_reader {
volatile mdbx_pid_t mr_pid;
/* The thread ID of the thread owning this txn. */
volatile mdbx_tid_t mr_tid;
/* The number of pages used in the reader's MVCC snapshot,
* i.e. the value of meta->mm_geo.next and txn->mt_next_pgno */
volatile pgno_t mr_snapshot_pages;
/* cache line alignment */
uint8_t pad[MDBX_CACHELINE_SIZE -
(sizeof(txnid_t) + sizeof(mdbx_pid_t) + sizeof(mdbx_tid_t)) %
MDBX_CACHELINE_SIZE];
uint8_t pad[MDBX_CACHELINE_SIZE - (sizeof(txnid_t) + sizeof(mdbx_pid_t) +
sizeof(mdbx_tid_t) + sizeof(pgno_t)) %
MDBX_CACHELINE_SIZE];
} MDBX_reader;
/* Information about a single database in the environment. */
@@ -1149,7 +1158,7 @@ typedef struct MDBX_node {
#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i) * (ks)))
/* Set the node's key into keyptr, if requested. */
#define MDBX_GET_KEY(node, keyptr) \
#define MDBX_GET_MAYNULL_KEYPTR(node, keyptr) \
do { \
if ((keyptr) != NULL) { \
(keyptr)->iov_len = NODEKSZ(node); \
@@ -1158,7 +1167,7 @@ typedef struct MDBX_node {
} while (0)
/* Set the node's key into key. */
#define MDBX_GET_KEY2(node, key) \
#define MDBX_GET_KEYVALUE(node, key) \
do { \
key.iov_len = NODEKSZ(node); \
key.iov_base = NODEKEY(node); \
@@ -1206,7 +1215,7 @@ static __inline pgno_t pgno_sub(pgno_t base, pgno_t subtrahend) {
}
static __inline void mdbx_jitter4testing(bool tiny) {
#ifndef NDEBUG
#if MDBX_DEBUG
if (MDBX_DBG_JITTER & mdbx_runtime_flags)
mdbx_osal_jitter(tiny);
#else

View File

@@ -303,7 +303,7 @@
#endif /* __flatten */
#ifndef likely
# if defined(__GNUC__) || defined(__clang__)
# if (defined(__GNUC__) || defined(__clang__)) && !defined(__COVERITY__)
# define likely(cond) __builtin_expect(!!(cond), 1)
# else
# define likely(x) (x)
@@ -311,13 +311,24 @@
#endif /* likely */
#ifndef unlikely
# if defined(__GNUC__) || defined(__clang__)
# if (defined(__GNUC__) || defined(__clang__)) && !defined(__COVERITY__)
# define unlikely(cond) __builtin_expect(!!(cond), 0)
# else
# define unlikely(x) (x)
# endif
#endif /* unlikely */
/* Workaround for Coverity Scan */
#if defined(__COVERITY__) && __GNUC_PREREQ(7, 0) && !defined(__cplusplus)
typedef float _Float32;
typedef double _Float32x;
typedef double _Float64;
typedef long double _Float64x;
typedef float _Float128 __attribute__((__mode__(__TF__)));
typedef __complex__ float __cfloat128 __attribute__ ((__mode__ (__TC__)));
typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__)));
#endif /* Workaround for Coverity Scan */
/* Wrapper around __func__, which is a C99 feature */
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
# define mdbx_func_ __func__

View File

@@ -44,7 +44,8 @@ static __cold __attribute__((destructor)) void mdbx_global_destructor(void) {
/* lck */
#ifndef OFF_T_MAX
#define OFF_T_MAX (sizeof(off_t) > 4 ? INT64_MAX : INT32_MAX)
#define OFF_T_MAX \
((sizeof(off_t) > 4 ? INT64_MAX : INT32_MAX) & ~(size_t)0xffff)
#endif
#define LCK_WHOLE OFF_T_MAX

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */
/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */
/*
* Copyright 2015-2019 Leonid Yuriev <leo@yuriev.ru>
@@ -153,8 +153,10 @@ typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 {
/*----------------------------------------------------------------------------*/
#ifndef _MSC_VER
/* Prototype should match libc runtime. ISO POSIX (2003) & LSB 3.1 */
#if !defined(_MSC_VER) && \
/* workaround for avoid musl libc wrong prototype */ ( \
defined(__GLIBC__) || defined(__GNU_LIBRARY__))
/* Prototype should match libc runtime. ISO POSIX (2003) & LSB 1.x-3.x */
__nothrow __noreturn void __assert_fail(const char *assertion, const char *file,
unsigned line, const char *function);
#endif /* _MSC_VER */
@@ -542,7 +544,6 @@ int mdbx_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, uint64_t offset) {
if (bytes > MAX_WRITE)
return MDBX_EINVAL;
#if defined(_WIN32) || defined(_WIN64)
OVERLAPPED ov;
ov.hEvent = 0;
ov.Offset = (DWORD)offset;
@@ -567,21 +568,21 @@ int mdbx_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, uint64_t offset) {
int mdbx_pwrite(mdbx_filehandle_t fd, const void *buf, size_t bytes,
uint64_t offset) {
#if defined(_WIN32) || defined(_WIN64)
if (bytes > MAX_WRITE)
return ERROR_INVALID_PARAMETER;
OVERLAPPED ov;
ov.hEvent = 0;
ov.Offset = (DWORD)offset;
ov.OffsetHigh = HIGH_DWORD(offset);
DWORD written;
if (likely(WriteFile(fd, buf, (DWORD)bytes, &written, &ov)))
return (bytes == written) ? MDBX_SUCCESS : MDBX_EIO /* ERROR_WRITE_FAULT */;
return GetLastError();
#else
while (true) {
#if defined(_WIN32) || defined(_WIN64)
OVERLAPPED ov;
ov.hEvent = 0;
ov.Offset = (DWORD)offset;
ov.OffsetHigh = HIGH_DWORD(offset);
DWORD written;
if (unlikely(!WriteFile(fd, buf,
(bytes <= MAX_WRITE) ? (DWORD)bytes : MAX_WRITE,
&written, &ov)))
return GetLastError();
if (likely(bytes == written))
return MDBX_SUCCESS;
#else
STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t),
"libmdbx requires 64-bit file I/O on 64-bit systems");
const intptr_t written =
@@ -592,15 +593,13 @@ int mdbx_pwrite(mdbx_filehandle_t fd, const void *buf, size_t bytes,
const int rc = errno;
if (rc != EINTR)
return rc;
} else if (written > 0) {
bytes -= written;
offset += written;
buf = (char *)buf + written;
} else {
return -1;
continue;
}
}
#endif
bytes -= written;
offset += written;
buf = (char *)buf + written;
}
}
int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt,
@@ -631,84 +630,31 @@ int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt,
#endif
}
int mdbx_write(mdbx_filehandle_t fd, const void *buf, size_t bytes) {
#ifdef SIGPIPE
sigset_t set, old;
sigemptyset(&set);
sigaddset(&set, SIGPIPE);
int rc = pthread_sigmask(SIG_BLOCK, &set, &old);
if (rc != 0)
return rc;
#endif
const char *ptr = buf;
for (;;) {
size_t chunk = (MAX_WRITE < bytes) ? MAX_WRITE : bytes;
#if defined(_WIN32) || defined(_WIN64)
DWORD written;
if (unlikely(!WriteFile(fd, ptr, (DWORD)chunk, &written, NULL)))
return GetLastError();
#else
intptr_t written = write(fd, ptr, chunk);
if (written < 0) {
int rc = errno;
#ifdef SIGPIPE
if (rc == EPIPE) {
/* Collect the pending SIGPIPE, otherwise at least OS X
* gives it to the process on thread-exit (ITS#8504). */
int tmp;
sigwait(&set, &tmp);
written = 0;
continue;
}
pthread_sigmask(SIG_SETMASK, &old, NULL);
#endif
return rc;
}
#endif
if (likely(bytes == (size_t)written)) {
#ifdef SIGPIPE
pthread_sigmask(SIG_SETMASK, &old, NULL);
#endif
return MDBX_SUCCESS;
}
ptr += written;
bytes -= written;
}
}
int mdbx_filesync(mdbx_filehandle_t fd, bool filesize_changed) {
#if defined(_WIN32) || defined(_WIN64)
(void)filesize_changed;
return FlushFileBuffers(fd) ? MDBX_SUCCESS : GetLastError();
#elif __GLIBC_PREREQ(2, 16) || _BSD_SOURCE || _XOPEN_SOURCE || \
(__GLIBC_PREREQ(2, 8) && _POSIX_C_SOURCE >= 200112L)
for (;;) {
/* LY: It is no reason to use fdatasync() here, even in case
* no such bug in a kernel. Because "no-bug" mean that a kernel
* internally do nearly the same, e.g. fdatasync() == fsync()
* when no-kernel-bug and file size was changed.
*
* So, this code is always safe and without appreciable
* performance degradation.
*
* For more info about of a corresponding fdatasync() bug
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */
#if _POSIX_C_SOURCE >= 199309L || _XOPEN_SOURCE >= 500 || \
defined(_POSIX_SYNCHRONIZED_IO)
if (!filesize_changed && fdatasync(fd) == 0)
return MDBX_SUCCESS;
#else
int rc;
do {
#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
/* LY: This code is always safe and without appreciable performance
* degradation, even on a kernel with fdatasync's bug.
*
* For more info about of a corresponding fdatasync() bug
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */
if (!filesize_changed) {
if (fdatasync(fd) == 0)
return MDBX_SUCCESS;
} else
#else
(void)filesize_changed;
#endif
if (fsync(fd) == 0)
if (fsync(fd) == 0)
return MDBX_SUCCESS;
int rc = errno;
if (rc != EINTR)
return rc;
}
#else
#error FIXME
rc = errno;
} while (rc == EINTR);
return rc;
#endif
}

View File

@@ -1,4 +1,4 @@
/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */
/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */
/*
* Copyright 2015-2019 Leonid Yuriev <leo@yuriev.ru>
@@ -494,7 +494,6 @@ int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt,
int mdbx_pread(mdbx_filehandle_t fd, void *buf, size_t count, uint64_t offset);
int mdbx_pwrite(mdbx_filehandle_t fd, const void *buf, size_t count,
uint64_t offset);
int mdbx_write(mdbx_filehandle_t fd, const void *buf, size_t count);
int mdbx_thread_create(mdbx_thread_t *thread,
THREAD_RESULT(THREAD_CALL *start_routine)(void *),

View File

@@ -92,7 +92,8 @@ MDBX_envinfo envinfo;
MDBX_stat envstat;
size_t maxkeysize, userdb_count, skipped_subdb;
uint64_t reclaimable_pages, gc_pages, lastpgno, unused_pages;
unsigned verbose, quiet;
unsigned verbose;
char ignore_wrong_order, quiet;
const char *only_subdb;
struct problem {
@@ -153,6 +154,13 @@ static void pagemap_cleanup(void) {
static walk_dbi_t *pagemap_lookup_dbi(const char *dbi_name, bool silent) {
static walk_dbi_t *last;
if (dbi_name == MDBX_PGWALK_MAIN)
return &dbi_main;
if (dbi_name == MDBX_PGWALK_GC)
return &dbi_free;
if (dbi_name == MDBX_PGWALK_META)
return &dbi_meta;
if (last && strcmp(last->name, dbi_name) == 0)
return last;
@@ -165,7 +173,7 @@ static walk_dbi_t *pagemap_lookup_dbi(const char *dbi_name, bool silent) {
}
dbi->name = mdbx_strdup(dbi_name);
if (verbose > 1 && !silent) {
if (verbose > 0 && !silent) {
print(" - found '%s' area\n", dbi_name);
fflush(NULL);
}
@@ -244,36 +252,34 @@ static size_t problems_pop(struct problem *list) {
return count;
}
static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx, int deep,
const char *dbi_name, size_t page_size,
MDBX_page_type_t pagetype, size_t nentries,
size_t payload_bytes, size_t header_bytes,
size_t unused_bytes) {
static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
void *const ctx, const int deep,
const char *const dbi_name_or_tag, const size_t page_size,
const MDBX_page_type_t pagetype, const size_t nentries,
const size_t payload_bytes, const size_t header_bytes,
const size_t unused_bytes) {
(void)ctx;
if (deep > 42) {
problem_add("deep", deep, "too large", nullptr);
return MDBX_CORRUPTED /* avoid infinite loop/recursion */;
}
if (pagetype == MDBX_page_void)
return MDBX_SUCCESS;
walk_dbi_t fake, *dbi = &fake;
if (deep > 0) {
dbi = pagemap_lookup_dbi(dbi_name, false);
if (!dbi)
return MDBX_ENOMEM;
} else if (deep == 0 && strcmp(dbi_name, dbi_main.name) == 0)
dbi = &dbi_main;
else if (deep == -1 && strcmp(dbi_name, dbi_free.name) == 0)
dbi = &dbi_free;
else if (deep == -2 && strcmp(dbi_name, dbi_meta.name) == 0)
dbi = &dbi_meta;
else
problem_add("deep", deep, "unknown area", "%s", dbi_name);
walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name_or_tag, false);
if (!dbi)
return MDBX_ENOMEM;
const size_t page_bytes = payload_bytes + header_bytes + unused_bytes;
walk.pgcount += pgnumber;
const char *pagetype_caption;
bool branch = false;
switch (pagetype) {
default:
problem_add("page", pgno, "unknown page-type", "%u", (unsigned)pagetype);
problem_add("page", pgno, "unknown page-type", "type %u, deep %i",
(unsigned)pagetype, deep);
pagetype_caption = "unknown";
dbi->pages.other += pgnumber;
break;
@@ -289,6 +295,7 @@ static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx, int deep,
case MDBX_page_branch:
pagetype_caption = "branch";
dbi->pages.branch += pgnumber;
branch = true;
break;
case MDBX_page_leaf:
pagetype_caption = "leaf";
@@ -309,15 +316,39 @@ static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx, int deep,
}
if (pgnumber) {
if (verbose > 3 && (!only_subdb || strcmp(only_subdb, dbi_name) == 0)) {
if (verbose > 3 && (!only_subdb || strcmp(only_subdb, dbi->name) == 0)) {
if (pgnumber == 1)
print(" %s-page %" PRIu64, pagetype_caption, pgno);
else
print(" %s-span %" PRIu64 "[%u]", pagetype_caption, pgno, pgnumber);
print(" of %s: header %" PRIiPTR ", payload %" PRIiPTR
", unused %" PRIiPTR "\n",
dbi_name, header_bytes, payload_bytes, unused_bytes);
", unused %" PRIiPTR ", deep %i\n",
dbi->name, header_bytes, payload_bytes, unused_bytes, deep);
}
bool already_used = false;
for (unsigned n = 0; n < pgnumber; ++n) {
uint64_t spanpgno = pgno + n;
if (spanpgno >= lastpgno)
problem_add("page", spanpgno, "wrong page-no",
"%s-page: %" PRIu64 " > %" PRIu64 ", deep %i",
pagetype_caption, spanpgno, lastpgno, deep);
else if (walk.pagemap[spanpgno]) {
walk_dbi_t *coll_dbi = &walk.dbi[walk.pagemap[spanpgno] - 1];
problem_add("page", spanpgno,
(branch && coll_dbi == dbi) ? "loop" : "already used",
"%s-page: by %s, deep %i", pagetype_caption, coll_dbi->name,
deep);
already_used = true;
} else {
walk.pagemap[spanpgno] = (short)(dbi - walk.dbi + 1);
dbi->pages.total += 1;
}
}
if (already_used)
return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */
: MDBX_SUCCESS;
}
if (unused_bytes > page_size)
@@ -342,8 +373,9 @@ static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx, int deep,
} */
} else {
problem_add("page", pgno, "empty",
"%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries",
pagetype_caption, payload_bytes, nentries);
"%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR
" entries, deep %i",
pagetype_caption, payload_bytes, nentries, deep);
dbi->pages.empty += 1;
}
}
@@ -352,9 +384,9 @@ static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx, int deep,
if (page_bytes != page_size) {
problem_add("page", pgno, "misused",
"%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR
"h + %" PRIuPTR "p + %" PRIuPTR "u)",
"h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i",
pagetype_caption, page_size, page_bytes, header_bytes,
payload_bytes, unused_bytes);
payload_bytes, unused_bytes, deep);
if (page_size > page_bytes)
dbi->lost_bytes += page_size - page_bytes;
} else {
@@ -363,23 +395,6 @@ static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx, int deep,
}
}
if (pgnumber) {
do {
if (pgno >= lastpgno)
problem_add("page", pgno, "wrong page-no",
"%s-page: %" PRIu64 " > %" PRIu64, pagetype_caption, pgno,
lastpgno);
else if (walk.pagemap[pgno])
problem_add("page", pgno, "already used", "%s-page: by %s",
pagetype_caption, walk.dbi[walk.pagemap[pgno] - 1].name);
else {
walk.pagemap[pgno] = (short)(dbi - walk.dbi + 1);
dbi->pages.total += 1;
}
++pgno;
} while (--pgnumber);
}
return user_break ? MDBX_EINTR : MDBX_SUCCESS;
}
@@ -615,6 +630,8 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
saved_list = problems_push();
prev_key.iov_base = NULL;
prev_key.iov_len = 0;
prev_data.iov_base = NULL;
prev_data.iov_len = 0;
rc = mdbx_cursor_get(mc, &key, &data, MDBX_FIRST);
while (rc == MDBX_SUCCESS) {
@@ -625,40 +642,55 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
goto bailout;
}
bool bad_key = false;
if (key.iov_len > maxkeysize) {
problem_add("entry", record_count, "key length exceeds max-key-size",
"%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize);
bad_key = true;
} else if ((flags & MDBX_INTEGERKEY) && key.iov_len != sizeof(uint64_t) &&
key.iov_len != sizeof(uint32_t)) {
problem_add("entry", record_count, "wrong key length",
"%" PRIuPTR " != 4or8", key.iov_len);
bad_key = true;
}
bool bad_data = false;
if ((flags & MDBX_INTEGERDUP) && data.iov_len != sizeof(uint64_t) &&
data.iov_len != sizeof(uint32_t)) {
problem_add("entry", record_count, "wrong data length",
"%" PRIuPTR " != 4or8", data.iov_len);
bad_data = true;
}
if (prev_key.iov_base) {
if (prev_key.iov_base && !bad_data) {
if ((flags & MDBX_DUPFIXED) && prev_data.iov_len != data.iov_len) {
problem_add("entry", record_count, "different data length",
"%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len,
data.iov_len);
bad_data = true;
}
int cmp = mdbx_cmp(txn, dbi_handle, &prev_key, &key);
if (cmp > 0) {
problem_add("entry", record_count, "broken ordering of entries", NULL);
} else if (cmp == 0) {
++dups;
if (!(flags & MDBX_DUPSORT))
problem_add("entry", record_count, "duplicated entries", NULL);
else if (flags & MDBX_INTEGERDUP) {
cmp = mdbx_dcmp(txn, dbi_handle, &prev_data, &data);
if (cmp > 0)
problem_add("entry", record_count,
"broken ordering of multi-values", NULL);
if (!bad_key) {
int cmp = mdbx_cmp(txn, dbi_handle, &prev_key, &key);
if (cmp == 0) {
++dups;
if ((flags & MDBX_DUPSORT) == 0) {
problem_add("entry", record_count, "duplicated entries", NULL);
if (data.iov_len == prev_data.iov_len &&
memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) {
problem_add("entry", record_count, "complete duplicate", NULL);
}
} else if (!bad_data) {
cmp = mdbx_dcmp(txn, dbi_handle, &prev_data, &data);
if (cmp == 0) {
problem_add("entry", record_count, "complete duplicate", NULL);
} else if (cmp > 0 && !ignore_wrong_order) {
problem_add("entry", record_count, "wrong order of multi-values",
NULL);
}
}
} else if (cmp > 0 && !ignore_wrong_order) {
problem_add("entry", record_count, "wrong order of entries", NULL);
}
}
} else if (verbose) {
@@ -670,7 +702,7 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
if (handler) {
rc = handler(record_count, &key, &data);
if (rc)
if (MDBX_IS_ERROR(rc))
goto bailout;
}
@@ -678,8 +710,10 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
key_bytes += key.iov_len;
data_bytes += data.iov_len;
prev_key = key;
prev_data = data;
if (!bad_key)
prev_key = key;
if (!bad_data)
prev_data = data;
rc = mdbx_cursor_get(mc, &key, &data, MDBX_NEXT);
}
if (rc != MDBX_NOTFOUND)
@@ -701,7 +735,7 @@ bailout:
}
mdbx_cursor_close(mc);
return rc || problems_count;
return (rc || problems_count) ? MDBX_RESULT_TRUE : MDBX_SUCCESS;
}
static void usage(char *prog) {
@@ -714,7 +748,8 @@ static void usage(char *prog) {
" -w\t\tlock DB for writing while checking\n"
" -d\t\tdisable page-by-page traversal of b-tree\n"
" -s subdb\tprocess a specific subdatabase only\n"
" -c\t\tforce cooperative mode (don't try exclusive)\n",
" -c\t\tforce cooperative mode (don't try exclusive)\n"
" -i\t\tignore wrong order errors (for custom comparators case)\n",
prog);
exit(EXIT_INTERRUPTED);
}
@@ -898,7 +933,7 @@ int main(int argc, char *argv[]) {
usage(prog);
}
for (int i; (i = getopt(argc, argv, "Vvqnwcds:")) != EOF;) {
for (int i; (i = getopt(argc, argv, "Vvqnwcdsi:")) != EOF;) {
switch (i) {
case 'V':
printf("%s (%s, build %s)\n", mdbx_version.git.describe,
@@ -928,6 +963,9 @@ int main(int argc, char *argv[]) {
usage(prog);
only_subdb = optarg;
break;
case 'i':
ignore_wrong_order = 1;
break;
default:
usage(prog);
}
@@ -1142,7 +1180,9 @@ int main(int argc, char *argv[]) {
uint64_t all_leaf = dbi->pages.leaf + dbi->pages.leaf_dupfixed;
if (all_leaf) {
print(", leaf %" PRIu64, all_leaf);
if (verbose > 2)
if (verbose > 2 &&
(dbi->pages.leaf_dupfixed | dbi->pages.subleaf_dupsort |
dbi->pages.subleaf_dupsort))
print(" (usual %" PRIu64 ", sub-dupsort %" PRIu64
", dupfixed %" PRIu64 ", sub-dupfixed %" PRIu64 ")",
dbi->pages.leaf, dbi->pages.subleaf_dupsort,
@@ -1163,20 +1203,22 @@ int main(int argc, char *argv[]) {
total_page_bytes);
if (verbose > 2) {
for (walk_dbi_t *dbi = walk.dbi; dbi < walk.dbi + MAX_DBI && dbi->name;
++dbi) {
uint64_t dbi_bytes = dbi->pages.total * envstat.ms_psize;
print(" %s: subtotal %" PRIu64 " bytes (%.1f%%),"
" payload %" PRIu64 " (%.1f%%), unused %" PRIu64 " (%.1f%%)",
dbi->name, dbi_bytes, dbi_bytes * 100.0 / total_page_bytes,
dbi->payload_bytes, dbi->payload_bytes * 100.0 / dbi_bytes,
dbi_bytes - dbi->payload_bytes,
(dbi_bytes - dbi->payload_bytes) * 100.0 / dbi_bytes);
if (dbi->pages.empty)
print(", %" PRIu64 " empty pages", dbi->pages.empty);
if (dbi->lost_bytes)
print(", %" PRIu64 " bytes lost", dbi->lost_bytes);
print("\n");
}
++dbi)
if (dbi->pages.total) {
uint64_t dbi_bytes = dbi->pages.total * envstat.ms_psize;
print(" %s: subtotal %" PRIu64 " bytes (%.1f%%),"
" payload %" PRIu64 " (%.1f%%), unused %" PRIu64 " (%.1f%%)",
dbi->name, dbi_bytes, dbi_bytes * 100.0 / total_page_bytes,
dbi->payload_bytes, dbi->payload_bytes * 100.0 / dbi_bytes,
dbi_bytes - dbi->payload_bytes,
(dbi_bytes - dbi->payload_bytes) * 100.0 / dbi_bytes);
if (dbi->pages.empty)
print(", %" PRIu64 " empty pages", dbi->pages.empty);
if (dbi->lost_bytes)
print(", %" PRIu64 " bytes lost", dbi->lost_bytes);
print("\n");
} else
print(" %s: empty\n", dbi->name);
}
print(" - summary: average fill %.1f%%",
walk.total_payload_bytes * 100.0 / total_page_bytes);

View File

@@ -39,6 +39,13 @@ option below.
.BR \-V
Write the library version number to the standard output, and exit.
.TP
.BR \-a
Append all records in the order they appear in the input. The input is assumed to already be
in correctly sorted order and no sorting or checking for redundant values will be performed.
This option must be used to reload data that was produced by running
.B mdbx_dump
on a database that uses custom compare functions.
.TP
.BR \-f \ file
Read from the specified file instead of from the standard input.
.TP

View File

@@ -1,4 +1,4 @@
/* mdbx_load.c - memory-mapped database load tool */
/* mdbx_load.c - memory-mapped database load tool */
/*
* Copyright 2015-2019 Leonid Yuriev <leo@yuriev.ru>
@@ -57,6 +57,7 @@ static int Eof;
static MDBX_envinfo envinfo;
static MDBX_val kbuf, dbuf;
static MDBX_val k0buf;
#define STRLENOF(s) (sizeof(s) - 1)
@@ -304,11 +305,18 @@ static int readline(MDBX_val *out, MDBX_val *buf) {
}
static void usage(void) {
fprintf(stderr, "usage: %s [-V] [-f input] [-n] [-s name] [-N] [-T] dbpath\n",
fprintf(stderr,
"usage: %s [-V] [-a] [-f input] [-n] [-s name] [-N] [-T] dbpath\n",
prog);
exit(EXIT_FAILURE);
}
static int anyway_greater(const MDBX_val *a, const MDBX_val *b) {
(void)a;
(void)b;
return 1;
}
int main(int argc, char *argv[]) {
int i, rc;
MDBX_env *env = NULL;
@@ -316,28 +324,32 @@ int main(int argc, char *argv[]) {
MDBX_cursor *mc = NULL;
MDBX_dbi dbi;
char *envname = NULL;
int envflags = 0, putflags = 0;
int envflags = MDBX_UTTERLY_NOSYNC, putflags = 0;
int append = 0;
MDBX_val prevk;
prog = argv[0];
if (argc < 2) {
if (argc < 2)
usage();
}
/* -f: load file instead of stdin
/* -a: append records in input order
* -f: load file instead of stdin
* -n: use NOSUBDIR flag on env_open
* -s: load into named subDB
* -N: use NOOVERWRITE on puts
* -T: read plaintext
* -V: print version and exit
*/
while ((i = getopt(argc, argv, "f:ns:NTV")) != EOF) {
while ((i = getopt(argc, argv, "af:ns:NTV")) != EOF) {
switch (i) {
case 'V':
printf("%s (%s, build %s)\n", mdbx_version.git.describe,
mdbx_version.git.datetime, mdbx_build.datetime);
exit(EXIT_SUCCESS);
break;
case 'a':
append = 1;
break;
case 'f':
if (freopen(optarg, "r", stdin) == NULL) {
fprintf(stderr, "%s: %s: reopen: %s\n", prog, optarg,
@@ -381,6 +393,7 @@ int main(int argc, char *argv[]) {
dbuf.iov_len = 4096;
dbuf.iov_base = mdbx_malloc(dbuf.iov_len);
/* read first header for mapsize= */
if (!(mode & NOHDR))
readhdr();
@@ -418,8 +431,17 @@ int main(int argc, char *argv[]) {
goto env_close;
}
kbuf.iov_len = mdbx_env_get_maxkeysize(env) * 2 + 2;
kbuf.iov_base = mdbx_malloc(kbuf.iov_len);
kbuf.iov_len = mdbx_env_get_maxkeysize(env);
if (kbuf.iov_len >= SIZE_MAX / 4) {
fprintf(stderr, "mdbx_env_get_maxkeysize failed, returns %zu\n",
kbuf.iov_len);
goto env_close;
}
kbuf.iov_len = (kbuf.iov_len + 1) * 2;
kbuf.iov_base = malloc(kbuf.iov_len * 2);
k0buf.iov_len = kbuf.iov_len;
k0buf.iov_base = (char *)kbuf.iov_base + kbuf.iov_len;
prevk.iov_base = k0buf.iov_base;
while (!Eof) {
if (user_break) {
@@ -427,9 +449,6 @@ int main(int argc, char *argv[]) {
break;
}
MDBX_val key, data;
int batch = 0;
rc = mdbx_txn_begin(env, NULL, 0, &txn);
if (rc) {
fprintf(stderr, "mdbx_txn_begin failed, error %d %s\n", rc,
@@ -437,7 +456,9 @@ int main(int argc, char *argv[]) {
goto env_close;
}
rc = mdbx_dbi_open(txn, subname, dbi_flags | MDBX_CREATE, &dbi);
rc = mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi,
append ? anyway_greater : NULL,
append ? anyway_greater : NULL);
if (rc) {
fprintf(stderr, "mdbx_open failed, error %d %s\n", rc, mdbx_strerror(rc));
goto txn_abort;
@@ -450,11 +471,15 @@ int main(int argc, char *argv[]) {
goto txn_abort;
}
int batch = 0;
prevk.iov_len = 0;
while (1) {
MDBX_val key;
rc = readline(&key, &kbuf);
if (rc) /* rc == EOF */
break;
MDBX_val data;
rc = readline(&data, &dbuf);
if (rc) {
fprintf(stderr, "%s: line %" PRIiSIZE ": failed to read key value\n",
@@ -462,7 +487,18 @@ int main(int argc, char *argv[]) {
goto txn_abort;
}
rc = mdbx_cursor_put(mc, &key, &data, putflags);
int appflag = 0;
if (append) {
appflag = MDBX_APPEND;
if (dbi_flags & MDBX_DUPSORT) {
if (prevk.iov_len == key.iov_len &&
memcmp(prevk.iov_base, key.iov_base, key.iov_len) == 0)
appflag = MDBX_APPEND | MDBX_APPENDDUP;
else
memcpy(prevk.iov_base, key.iov_base, prevk.iov_len = key.iov_len);
}
}
rc = mdbx_cursor_put(mc, &key, &data, putflags | appflag);
if (rc == MDBX_KEYEXIST && putflags)
continue;
if (rc) {
@@ -501,6 +537,8 @@ int main(int argc, char *argv[]) {
goto env_close;
}
mdbx_dbi_close(env, dbi);
/* try read next header */
if (!(mode & NOHDR))
readhdr();
}

View File

@@ -14,11 +14,11 @@
#include "./bits.h"
#if MDBX_VERSION_MAJOR != 0 || MDBX_VERSION_MINOR != 2
#if MDBX_VERSION_MAJOR != 0 || MDBX_VERSION_MINOR != 3
#error "API version mismatch!"
#endif
#define MDBX_VERSION_RELEASE 2
#define MDBX_VERSION_RELEASE 0
#define MDBX_VERSION_REVISION 0
/*LIBMDBX_EXPORTS*/ const mdbx_version_info mdbx_version = {

View File

@@ -27,6 +27,8 @@ add_executable(${TARGET}
try.cc
utils.cc
utils.h
append.cc
ttl.cc
)
target_link_libraries(${TARGET}

165
test/append.cc Normal file
View File

@@ -0,0 +1,165 @@
/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
bool testcase_append::run() {
MDBX_dbi dbi;
int err = db_open__begin__table_create_open_clean(dbi);
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("append: bailout-prepare due '%s'", mdbx_strerror(err));
return true;
}
keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */);
/* LY: тест наполнения таблиц в append-режиме,
* при котором записи добавляются строго в конец (в порядке сортировки) */
const unsigned flags = (config.params.table_flags & MDBX_DUPSORT)
? MDBX_APPEND | MDBX_APPENDDUP
: MDBX_APPEND;
keyvalue_maker.make_ordered();
key = keygen::alloc(config.params.keylen_max);
data = keygen::alloc(config.params.datalen_max);
keygen::buffer last_key = keygen::alloc(config.params.keylen_max);
keygen::buffer last_data = keygen::alloc(config.params.datalen_max);
last_key->value.iov_base = last_key->bytes;
last_key->value.iov_len = 0;
last_data->value.iov_base = last_data->bytes;
last_data->value.iov_len = 0;
simple_checksum inserted_checksum;
uint64_t inserted_number = 0;
uint64_t serial_count = 0;
unsigned txn_nops = 0;
uint64_t commited_inserted_number = inserted_number;
simple_checksum commited_inserted_checksum = inserted_checksum;
while (should_continue()) {
const keygen::serial_t serial = serial_count;
if (!keyvalue_maker.increment(serial_count, 1)) {
// дошли до границы пространства ключей
break;
}
log_trace("append: append-a %" PRIu64, serial);
generate_pair(serial, key, data);
int cmp = inserted_number ? mdbx_cmp(txn_guard.get(), dbi, &key->value,
&last_key->value)
: 1;
if (cmp == 0 && (config.params.table_flags & MDBX_DUPSORT))
cmp = mdbx_dcmp(txn_guard.get(), dbi, &data->value, &last_data->value);
err = mdbx_put(txn_guard.get(), dbi, &key->value, &data->value, flags);
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("append: bailout-insert due '%s'", mdbx_strerror(err));
txn_end(true);
inserted_number = commited_inserted_number;
inserted_checksum = commited_inserted_checksum;
break;
}
if (cmp > 0) {
if (unlikely(err != MDBX_SUCCESS))
failure_perror("mdbx_put(appenda-a)", err);
memcpy(last_key->value.iov_base, key->value.iov_base,
last_key->value.iov_len = key->value.iov_len);
memcpy(last_data->value.iov_base, data->value.iov_base,
last_data->value.iov_len = data->value.iov_len);
++inserted_number;
inserted_checksum.push((uint32_t)inserted_number, key->value);
inserted_checksum.push(10639, data->value);
} else {
if (unlikely(err != MDBX_EKEYMISMATCH))
failure_perror("mdbx_put(appenda-a) != MDBX_EKEYMISMATCH", err);
}
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("append: bailout-commit due '%s'", mdbx_strerror(err));
inserted_number = commited_inserted_number;
inserted_checksum = commited_inserted_checksum;
break;
}
commited_inserted_number = inserted_number;
commited_inserted_checksum = inserted_checksum;
txn_nops = 0;
}
report(1);
}
if (txn_guard) {
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("append: bailout-commit due '%s'", mdbx_strerror(err));
inserted_number = commited_inserted_number;
inserted_checksum = commited_inserted_checksum;
}
}
//----------------------------------------------------------------------------
txn_begin(true);
cursor_open(dbi);
MDBX_val check_key, check_data;
err =
mdbx_cursor_get(cursor_guard.get(), &check_key, &check_data, MDBX_FIRST);
if (likely(inserted_number)) {
if (unlikely(err != MDBX_SUCCESS))
failure_perror("mdbx_cursor_get(MDBX_FIRST)", err);
}
simple_checksum read_checksum;
uint64_t read_count = 0;
while (err == MDBX_SUCCESS) {
++read_count;
read_checksum.push((uint32_t)read_count, check_key);
read_checksum.push(10639, check_data);
err =
mdbx_cursor_get(cursor_guard.get(), &check_key, &check_data, MDBX_NEXT);
}
if (unlikely(err != MDBX_NOTFOUND))
failure_perror("mdbx_cursor_get(MDBX_NEXT) != EOF", err);
if (unlikely(read_count != inserted_number))
failure("read_count(%" PRIu64 ") != inserted_number(%" PRIu64 ")",
read_count, inserted_number);
if (unlikely(read_checksum.value != inserted_checksum.value))
failure("read_checksum(0x%016" PRIu64 ") "
"!= inserted_checksum(0x%016" PRIu64 ")",
read_checksum.value, inserted_checksum.value);
cursor_close();
txn_end(true);
//----------------------------------------------------------------------------
if (dbi) {
if (config.params.drop_table && !mode_readonly()) {
txn_begin(false);
db_table_drop(dbi);
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("append: bailout-clean due '%s'", mdbx_strerror(err));
return true;
}
} else
db_table_close(dbi);
}
return true;
}

View File

@@ -63,12 +63,13 @@ void testcase_setup(const char *casename, actor_params &params,
log_notice(">>> testcase_setup(%s)", casename);
configure_actor(last_space_id, ac_jitter, nullptr, params);
configure_actor(last_space_id, ac_hill, nullptr, params);
configure_actor(last_space_id, ac_ttl, nullptr, params);
configure_actor(last_space_id, ac_jitter, nullptr, params);
configure_actor(last_space_id, ac_hill, nullptr, params);
configure_actor(last_space_id, ac_jitter, nullptr, params);
configure_actor(last_space_id, ac_hill, nullptr, params);
configure_actor(last_space_id, ac_ttl, nullptr, params);
configure_actor(last_space_id, ac_try, nullptr, params);
configure_actor(last_space_id, ac_copy, nullptr, params);
configure_actor(last_space_id, ac_append, nullptr, params);
log_notice("<<< testcase_setup(%s): done", casename);
} else {
failure("unknown testcase `%s`", casename);

View File

@@ -1,4 +1,4 @@
/*
/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -412,6 +412,8 @@ void dump(const char *title) {
i->params.max_tables);
log_info("drop table: %s\n", i->params.drop_table ? "Yes" : "No");
log_info("ignore MDBX_MAP_FULL error: %s\n",
i->params.ignore_dbfull ? "Yes" : "No");
indent.pop();
}

View File

@@ -27,7 +27,9 @@ enum actor_testcase {
ac_deadwrite,
ac_jitter,
ac_try,
ac_copy
ac_copy,
ac_append,
ac_ttl
};
enum actor_status {
@@ -36,7 +38,8 @@ enum actor_status {
as_running,
as_successful,
as_killed,
as_failed
as_failed,
as_coredump,
};
const char *testcase2str(const actor_testcase);
@@ -246,6 +249,7 @@ struct actor_params_pod {
keygen_params_pod keygen;
bool drop_table;
bool ignore_dbfull;
};
struct actor_config_pod {

View File

@@ -1,66 +0,0 @@
#!/bin/bash
set -euo pipefail
make check
TESTDB_PREFIX=${1:-/dev/shm/mdbx-gc-test}.
function rep9 { printf "%*s" $1 '' | tr ' ' '9'; }
function join { local IFS="$1"; shift; echo "$*"; }
function bit2option { local -n arr=$1; (( ($2&(1<<$3)) != 0 )) && echo -n '+' || echo -n '-'; echo "${arr[$3]}"; }
options=(writemap coalesce lifo)
function bits2list {
local -n arr=$1
local i
local list=()
for ((i=0; i<${#arr[@]}; ++i)) do
list[$i]=$(bit2option $1 $2 $i)
done
join , "${list[@]}"
}
function probe {
echo "=============================================== $(date)"
echo "${caption}: $*"
rm -f ${TESTDB_PREFIX}* \
&& ./mdbx_test --pathname=${TESTDB_PREFIX}db "$@" | lz4 > ${TESTDB_PREFIX}log.lz4 \
&& ./mdbx_chk -nvvv ${TESTDB_PREFIX}db | tee ${TESTDB_PREFIX}chk \
&& ./mdbx_chk -nvvv ${TESTDB_PREFIX}db-copy | tee ${TESTDB_PREFIX}chk-copy \
|| (echo "FAILED"; exit 1)
}
###############################################################################
count=0
for nops in {2..7}; do
for ((wbatch=nops-1; wbatch > 0; --wbatch)); do
loops=$(((3333 >> nops) / nops + 1))
for ((rep=0; rep++ < loops; )); do
for ((bits=2**${#options[@]}; --bits >= 0; )); do
seed=$(date +%N)
caption="Probe #$((++count)) int-key,w/o-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size=6G --table=+key.integer,-data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) int-key,with-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size=6G --table=+key.integer,+data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) int-key,int-data, repeat ${rep} of ${loops}" probe \
--pagesize=min --size=6G --table=+key.integer,+data.integer --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) w/o-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size=6G --table=-data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) with-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size=6G --table=+data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
done
done
done
done
echo "=== ALL DONE ====================== $(date)"

View File

@@ -15,11 +15,12 @@
#include "test.h"
bool testcase_hill::run() {
db_open();
txn_begin(false);
MDBX_dbi dbi = db_table_open(true);
txn_end(false);
MDBX_dbi dbi;
int err = db_open__begin__table_create_open_clean(dbi);
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("hill: bailout-prepare due '%s'", mdbx_strerror(err));
return true;
}
/* LY: тест "холмиком":
* - сначала наполняем таблицу циклическими CRUD-манипуляциями,
@@ -59,14 +60,15 @@ bool testcase_hill::run() {
: MDBX_NODUPDATA;
uint64_t serial_count = 0;
uint64_t commited_serial = serial_count;
unsigned txn_nops = 0;
if (!txn_guard)
txn_begin(false);
while (should_continue()) {
const keygen::serial_t a_serial = serial_count;
if (unlikely(!keyvalue_maker.increment(serial_count, 1)))
failure("uphill: unexpected key-space overflow");
if (unlikely(!keyvalue_maker.increment(serial_count, 1))) {
log_notice("uphill: unexpected key-space overflow");
break;
}
const keygen::serial_t b_serial = serial_count;
assert(b_serial > a_serial);
@@ -76,26 +78,52 @@ bool testcase_hill::run() {
log_trace("uphill: insert-a (age %" PRIu64 ") %" PRIu64, age_shift,
a_serial);
generate_pair(a_serial, a_key, a_data_1, age_shift);
int rc = mdbx_put(txn_guard.get(), dbi, &a_key->value, &a_data_1->value,
insert_flags);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_put(insert-a.1)", rc);
err = mdbx_put(txn_guard.get(), dbi, &a_key->value, &a_data_1->value,
insert_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at insert-a due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
break;
}
failure_perror("mdbx_put(insert-a.1)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
break;
}
commited_serial = a_serial;
txn_nops = 0;
}
// создаем вторую запись из пары
log_trace("uphill: insert-b %" PRIu64, b_serial);
generate_pair(b_serial, b_key, b_data, 0);
rc = mdbx_put(txn_guard.get(), dbi, &b_key->value, &b_data->value,
insert_flags);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_put(insert-b)", rc);
err = mdbx_put(txn_guard.get(), dbi, &b_key->value, &b_data->value,
insert_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at insert-b due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
break;
}
failure_perror("mdbx_put(insert-b)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
break;
}
commited_serial = a_serial;
txn_nops = 0;
}
@@ -104,25 +132,51 @@ bool testcase_hill::run() {
a_serial);
generate_pair(a_serial, a_key, a_data_0, 0);
checkdata("uphill: update-a", dbi, a_key->value, a_data_1->value);
rc = mdbx_replace(txn_guard.get(), dbi, &a_key->value, &a_data_0->value,
&a_data_1->value, update_flags);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_replace(update-a: 1->0)", rc);
err = mdbx_replace(txn_guard.get(), dbi, &a_key->value, &a_data_0->value,
&a_data_1->value, update_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at update-a due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
break;
}
failure_perror("mdbx_replace(update-a: 1->0)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
break;
}
commited_serial = a_serial;
txn_nops = 0;
}
// удаляем вторую запись
log_trace("uphill: delete-b %" PRIu64, b_serial);
checkdata("uphill: delete-b", dbi, b_key->value, b_data->value);
rc = mdbx_del(txn_guard.get(), dbi, &b_key->value, &b_data->value);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_del(b)", rc);
err = mdbx_del(txn_guard.get(), dbi, &b_key->value, &b_data->value);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at delete-b due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
break;
}
failure_perror("mdbx_del(b)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
break;
}
commited_serial = a_serial;
txn_nops = 0;
}
@@ -134,7 +188,7 @@ bool testcase_hill::run() {
}
}
while (serial_count > 0) {
while (serial_count > 1) {
if (unlikely(!keyvalue_maker.increment(serial_count, -2)))
failure("downhill: unexpected key-space underflow");
@@ -150,26 +204,48 @@ bool testcase_hill::run() {
generate_pair(a_serial, a_key, a_data_0, 0);
generate_pair(a_serial, a_key, a_data_1, age_shift);
checkdata("downhill: update-a", dbi, a_key->value, a_data_0->value);
int rc = mdbx_replace(txn_guard.get(), dbi, &a_key->value, &a_data_1->value,
&a_data_0->value, update_flags);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_put(update-a: 0->1)", rc);
err = mdbx_replace(txn_guard.get(), dbi, &a_key->value, &a_data_1->value,
&a_data_0->value, update_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at update-a due '%s'",
mdbx_strerror(err));
txn_end(true);
break;
}
failure_perror("mdbx_put(update-a: 0->1)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
break;
}
txn_nops = 0;
}
// создаем вторую запись из пары
log_trace("downhill: insert-b %" PRIu64, b_serial);
generate_pair(b_serial, b_key, b_data, 0);
rc = mdbx_put(txn_guard.get(), dbi, &b_key->value, &b_data->value,
insert_flags);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_put(insert-b)", rc);
err = mdbx_put(txn_guard.get(), dbi, &b_key->value, &b_data->value,
insert_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at insert-a due '%s'",
mdbx_strerror(err));
txn_end(true);
break;
}
failure_perror("mdbx_put(insert-b)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
break;
}
txn_nops = 0;
}
@@ -177,38 +253,67 @@ bool testcase_hill::run() {
log_trace("downhill: delete-a (age %" PRIu64 ") %" PRIu64, age_shift,
a_serial);
checkdata("downhill: delete-a", dbi, a_key->value, a_data_1->value);
rc = mdbx_del(txn_guard.get(), dbi, &a_key->value, &a_data_1->value);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_del(a)", rc);
err = mdbx_del(txn_guard.get(), dbi, &a_key->value, &a_data_1->value);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at delete-a due '%s'",
mdbx_strerror(err));
txn_end(true);
break;
}
failure_perror("mdbx_del(a)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
break;
}
txn_nops = 0;
}
// удаляем вторую запись
log_trace("downhill: delete-b %" PRIu64, b_serial);
checkdata("downhill: delete-b", dbi, b_key->value, b_data->value);
rc = mdbx_del(txn_guard.get(), dbi, &b_key->value, &b_data->value);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_del(b)", rc);
err = mdbx_del(txn_guard.get(), dbi, &b_key->value, &b_data->value);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at delete-b due '%s'",
mdbx_strerror(err));
txn_end(true);
break;
}
failure_perror("mdbx_del(b)", err);
}
if (++txn_nops >= config.params.batch_write) {
txn_restart(false, false);
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
break;
}
txn_nops = 0;
}
report(1);
}
if (txn_guard)
txn_end(false);
if (txn_guard) {
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS))
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
}
if (dbi) {
if (config.params.drop_table && !mode_readonly()) {
txn_begin(false);
db_table_drop(dbi);
txn_end(false);
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("hill: bailout-clean due '%s'", mdbx_strerror(err));
return true;
}
} else
db_table_close(dbi);
}

View File

@@ -1,4 +1,4 @@
/*
/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -167,7 +167,16 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id,
base = 0;
}
bool maker::increment(serial_t &serial, int delta) {
void maker::make_ordered() {
mapping.mesh = 0;
mapping.rotate = 0;
}
bool maker::is_unordered() const {
return (mapping.mesh >= serial_minwith || mapping.rotate) != 0;
}
bool maker::increment(serial_t &serial, int delta) const {
if (serial > mask(mapping.width)) {
log_extra("keygen-increment: %" PRIu64 " > %" PRIu64 ", overflow", serial,
mask(mapping.width));
@@ -175,7 +184,8 @@ bool maker::increment(serial_t &serial, int delta) {
}
serial_t target = serial + (int64_t)delta;
if (target > mask(mapping.width)) {
if (target > mask(mapping.width) ||
((delta > 0) ? target < serial : target > serial)) {
log_extra("keygen-increment: %" PRIu64 "%-d => %" PRIu64 ", overflow",
serial, delta, target);
return false;

View File

@@ -1,4 +1,4 @@
/*
/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -121,8 +121,10 @@ public:
serial_t value_age);
void setup(const config::actor_params_pod &actor, unsigned actor_id,
unsigned thread_number);
void make_ordered();
bool is_unordered() const;
bool increment(serial_t &serial, int delta);
bool increment(serial_t &serial, int delta) const;
};
} /* namespace keygen */

108
test/long_stochastic.sh Executable file
View File

@@ -0,0 +1,108 @@
#!/bin/bash
set -euo pipefail
TESTDB_PREFIX=${1:-/dev/shm/mdbx-gc-test}.
rm -f $(dirname ${TESTDB_PREFIX})/*
if LC_ALL=C free | grep -q -i available; then
ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 7) / 1024))
else
ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 4) / 1024))
fi
ram_reserve4logs_mb=3333
if [ ${ram_avail_mb} -lt ${ram_reserve4logs_mb} ]; then
echo "=== At least ${ram_reserve4logs_mb}Mb RAM required"
exit -2
fi
#
# В режимах отличных от MDBX_WRITEMAP изменения до записи в файл
# будут накапливаться в памяти, что может потребовать свободной
# памяти размером с БД. Кроме этого, в тест входит сценарий
# создания копия БД на ходу. Поэтому БД не может быть больше 1/3
# от доступной памяти. Однако, следует учесть что malloc() будет
# не сразу возвращать выделенную память системе, а также
# предусмотреть места для логов.
#
# In non-MDBX_WRITEMAP modes, updates (dirty pages) will
# accumulate in memory before writing to the disk, which may
# require a free memory up to the size of a whole database. In
# addition, the test includes a script create a copy of the
# database on the go. Therefore, the database cannot be more 1/3
# of available memory. Moreover, should be taken into account
# that malloc() will not return the allocated memory to the
# system immediately, as well some space is required for logs.
#
db_size_mb=$(expr '(' ${ram_avail_mb} - ${ram_reserve4logs_mb} ')' / 4)
if [ ${db_size_mb} -gt 3072 ]; then
db_size_mb=3072
fi
echo "=== ${ram_avail_mb}M RAM available, use ${db_size_mb}M for DB"
make check
rm -f $(dirname ${TESTDB_PREFIX})/*
###############################################################################
function rep9 { printf "%*s" $1 '' | tr ' ' '9'; }
function join { local IFS="$1"; shift; echo "$*"; }
function bit2option { local -n arr=$1; (( ($2&(1<<$3)) != 0 )) && echo -n '+' || echo -n '-'; echo "${arr[$3]}"; }
options=(writemap coalesce lifo)
function bits2list {
local -n arr=$1
local i
local list=()
for ((i=0; i<${#arr[@]}; ++i)) do
list[$i]=$(bit2option $1 $2 $i)
done
join , "${list[@]}"
}
function probe {
echo "=============================================== $(date)"
echo "${caption}: $*"
rm -f ${TESTDB_PREFIX}* \
&& ./mdbx_test --ignore-dbfull --repeat=42 --pathname=${TESTDB_PREFIX}db "$@" | lz4 > ${TESTDB_PREFIX}log.lz4 \
&& ./mdbx_chk -nvvv ${TESTDB_PREFIX}db | tee ${TESTDB_PREFIX}chk \
&& ([ ! -e ${TESTDB_PREFIX}db-copy ] || ./mdbx_chk -nvvv ${TESTDB_PREFIX}db-copy | tee ${TESTDB_PREFIX}chk-copy) \
|| (echo "FAILED"; exit 1)
}
###############################################################################
count=0
for nops in $(seq 2 6); do
for ((wbatch=nops-1; wbatch > 0; --wbatch)); do
loops=$(((111 >> nops) / nops + 3))
for ((rep=0; rep++ < loops; )); do
for ((bits=2**${#options[@]}; --bits >= 0; )); do
seed=$(date +%N)
caption="Probe #$((++count)) int-key,w/o-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,-data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) int-key,with-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) int-key,int-data, repeat ${rep} of ${loops}" probe \
--pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.integer --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) w/o-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size-upper=${db_size_mb}M --table=-data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
caption="Probe #$((++count)) with-dups, repeat ${rep} of ${loops}" probe \
--pagesize=min --size-upper=${db_size_mb}M --table=+data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \
--nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \
--keygen.seed=${seed} basic
done
done
done
done
echo "=== ALL DONE ====================== $(date)"

View File

@@ -1,15 +0,0 @@
@echo off
del test.db test.db-lck
:loop
mdbx_test.exe --pathname=test.db --dont-cleanup-after basic > test.log
if errorlevel 1 goto fail
mdbx_chk.exe -nvvv test.db > chk.log
if errorlevel 1 goto fail
goto loop
:fail
echo FAILED

View File

@@ -27,6 +27,8 @@ void actor_params::set_defaults(const std::string &tmpdir) {
loglevel =
#ifdef NDEBUG
logging::info;
#elif defined(_WIN32) || defined(_WIN64)
logging::verbose;
#else
logging::trace;
#endif
@@ -37,7 +39,8 @@ void actor_params::set_defaults(const std::string &tmpdir) {
table_flags = MDBX_DUPSORT;
size_lower = -1;
size_now = 1024 * 1024 * ((table_flags & MDBX_DUPSORT) ? 4 : 256);
size_now =
intptr_t(1024) * 1024 * ((table_flags & MDBX_DUPSORT) ? 256 : 1024);
size_upper = -1;
shrink_threshold = -1;
growth_step = -1;
@@ -61,14 +64,15 @@ void actor_params::set_defaults(const std::string &tmpdir) {
datalen_min = mdbx_datalen_min();
datalen_max = std::min(mdbx_datalen_max(), 256u * 1024 + 42);
batch_read = 4;
batch_write = 4;
batch_read = 42;
batch_write = 42;
delaystart = 0;
waitfor_nops = 0;
inject_writefaultn = 0;
drop_table = false;
ignore_dbfull = false;
max_readers = 42;
max_tables = 42;
@@ -287,6 +291,9 @@ int main(int argc, char *const argv[]) {
continue;
if (config::parse_option(argc, argv, narg, "drop", params.drop_table))
continue;
if (config::parse_option(argc, argv, narg, "ignore-dbfull",
params.ignore_dbfull))
continue;
if (config::parse_option(argc, argv, narg, "dump-config",
global::config::dump_config))
continue;
@@ -341,6 +348,14 @@ int main(int argc, char *const argv[]) {
configure_actor(last_space_id, ac_copy, value, params);
continue;
}
if (config::parse_option(argc, argv, narg, "append", nullptr)) {
configure_actor(last_space_id, ac_append, value, params);
continue;
}
if (config::parse_option(argc, argv, narg, "ttl", nullptr)) {
configure_actor(last_space_id, ac_ttl, value, params);
continue;
}
if (config::parse_option(argc, argv, narg, "failfast",
global::config::failfast))
continue;

View File

@@ -199,7 +199,9 @@ retry:
if (WIFEXITED(status))
childs[pid] =
(WEXITSTATUS(status) == EXIT_SUCCESS) ? as_successful : as_failed;
else if (WIFSIGNALED(status) || WCOREDUMP(status))
else if (WCOREDUMP(status))
childs[pid] = as_coredump;
else if (WIFSIGNALED(status))
childs[pid] = as_killed;
else if (WIFSTOPPED(status))
childs[pid] = as_debuging;
@@ -216,7 +218,7 @@ retry:
if (ts.tv_sec == 0 && ts.tv_nsec == 0)
ts.tv_nsec = 1;
if (nanosleep(&ts, &ts) == 0) {
/* timeout and no signal fomr child */
/* timeout and no signal from child */
pid = 0;
return 0;
}

View File

@@ -312,14 +312,22 @@ actor_status osal_actor_info(const mdbx_pid_t pid) {
case EXIT_SUCCESS:
status = as_successful;
break;
// case EXCEPTION_BREAKPOINT:
case EXCEPTION_BREAKPOINT:
case EXCEPTION_SINGLE_STEP:
status = as_debuging;
break;
case STATUS_CONTROL_C_EXIT:
case EXCEPTION_NONCONTINUABLE_EXCEPTION:
status = as_killed;
break;
case EXCEPTION_ACCESS_VIOLATION:
case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
case EXCEPTION_DATATYPE_MISALIGNMENT:
case EXCEPTION_STACK_OVERFLOW:
case EXCEPTION_INVALID_DISPOSITION:
case EXCEPTION_ILLEGAL_INSTRUCTION:
case EXCEPTION_NONCONTINUABLE_EXCEPTION:
status = as_coredump;
break;
default:
status = as_failed;
break;

View File

@@ -33,6 +33,10 @@ const char *testcase2str(const actor_testcase testcase) {
return "try";
case ac_copy:
return "copy";
case ac_append:
return "append";
case ac_ttl:
return "ttl";
}
}
@@ -51,6 +55,8 @@ const char *status2str(actor_status status) {
return "killed";
case as_failed:
return "failed";
case as_coredump:
return "coredump";
}
}
@@ -133,6 +139,8 @@ void testcase::db_open() {
if (!db_guard)
db_prepare();
jitter_delay(true);
int rc = mdbx_env_open(db_guard.get(), config.params.pathname_db.c_str(),
(unsigned)config.params.mode_flags, 0640);
if (unlikely(rc != MDBX_SUCCESS))
@@ -166,28 +174,84 @@ void testcase::txn_begin(bool readonly, unsigned flags) {
flags);
}
int testcase::breakable_commit() {
int rc = MDBX_SUCCESS;
log_trace(">> txn_commit");
assert(txn_guard);
MDBX_txn *txn = txn_guard.release();
txn_inject_writefault(txn);
int err = mdbx_txn_commit(txn);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
rc = err;
err = mdbx_txn_abort(txn);
if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH))
failure_perror("mdbx_txn_abort()", err);
} else
failure_perror("mdbx_txn_commit()", err);
}
log_trace("<< txn_commit: %s", rc ? "failed" : "Ok");
return rc;
}
void testcase::txn_end(bool abort) {
log_trace(">> txn_end(%s)", abort ? "abort" : "commit");
assert(txn_guard);
MDBX_txn *txn = txn_guard.release();
if (abort) {
int rc = mdbx_txn_abort(txn);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_txn_abort()", rc);
int err = mdbx_txn_abort(txn);
if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH))
failure_perror("mdbx_txn_abort()", err);
} else {
txn_inject_writefault(txn);
int rc = mdbx_txn_commit(txn);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_txn_commit()", rc);
int err = mdbx_txn_commit(txn);
if (unlikely(err != MDBX_SUCCESS))
failure_perror("mdbx_txn_commit()", err);
}
log_trace("<< txn_end(%s)", abort ? "abort" : "commit");
}
void testcase::cursor_open(unsigned dbi) {
log_trace(">> cursor_open(%u)", dbi);
assert(!cursor_guard);
assert(txn_guard);
MDBX_cursor *cursor = nullptr;
int rc = mdbx_cursor_open(txn_guard.get(), dbi, &cursor);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_cursor_open()", rc);
cursor_guard.reset(cursor);
log_trace("<< cursor_open(%u)", dbi);
}
void testcase::cursor_close() {
log_trace(">> cursor_close()");
assert(cursor_guard);
MDBX_cursor *cursor = cursor_guard.release();
mdbx_cursor_close(cursor);
log_trace("<< cursor_close()");
}
int testcase::breakable_restart() {
int rc = MDBX_SUCCESS;
if (txn_guard)
rc = breakable_commit();
if (cursor_guard)
cursor_close();
txn_begin(false, 0);
return rc;
}
void testcase::txn_restart(bool abort, bool readonly, unsigned flags) {
if (txn_guard)
txn_end(abort);
if (cursor_guard)
cursor_close();
txn_begin(readonly, flags);
}
@@ -296,6 +360,7 @@ bool testcase::setup() {
return false;
start_timestamp = chrono::now_motonic();
nops_completed = 0;
return true;
}
@@ -365,6 +430,28 @@ void testcase::update_canary(uint64_t increment) {
log_trace("<< update_canary: sequence = %" PRIu64, canary_now.y);
}
int testcase::db_open__begin__table_create_open_clean(MDBX_dbi &dbi) {
db_open();
int err, retry_left = 42;
for (;;) {
txn_begin(false);
dbi = db_table_open(true);
db_table_clear(dbi);
err = breakable_commit();
if (likely(err == MDBX_SUCCESS)) {
txn_begin(false);
return MDBX_SUCCESS;
}
if (--retry_left == 0)
break;
jitter_delay(true);
}
log_notice("db_begin_table_create_open_clean: bailout due '%s'",
mdbx_strerror(err));
return err;
}
MDBX_dbi testcase::db_table_open(bool create) {
log_trace(">> testcase::db_table_create");
@@ -396,13 +483,21 @@ void testcase::db_table_drop(MDBX_dbi handle) {
if (config.params.drop_table) {
int rc = mdbx_drop(txn_guard.get(), handle, true);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_drop()", rc);
failure_perror("mdbx_drop(delete=true)", rc);
log_trace("<< testcase::db_table_drop");
} else {
log_trace("<< testcase::db_table_drop: not needed");
}
}
void testcase::db_table_clear(MDBX_dbi handle) {
log_trace(">> testcase::db_table_clear, handle %u", handle);
int rc = mdbx_drop(txn_guard.get(), handle, false);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_drop(delete=false)", rc);
log_trace("<< testcase::db_table_clear");
}
void testcase::db_table_close(MDBX_dbi handle) {
log_trace(">> testcase::db_table_close, handle %u", handle);
assert(!txn_guard);
@@ -424,8 +519,9 @@ void testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check,
//-----------------------------------------------------------------------------
bool test_execute(const actor_config &config) {
bool test_execute(const actor_config &config_const) {
const mdbx_pid_t pid = osal_getpid();
actor_config config = config_const;
if (global::singlemode) {
logging::setup(format("single_%s", testcase2str(config.testcase)));
@@ -458,23 +554,48 @@ bool test_execute(const actor_config &config) {
case ac_copy:
test.reset(new testcase_copy(config, pid));
break;
case ac_append:
test.reset(new testcase_append(config, pid));
break;
case ac_ttl:
test.reset(new testcase_ttl(config, pid));
break;
default:
test.reset(new testcase(config, pid));
break;
}
if (!test->setup())
log_notice("test setup failed");
else if (!test->run())
log_notice("test failed");
else if (!test->teardown())
log_notice("test teardown failed");
else {
log_info("test successed");
return true;
}
size_t iter = 0;
do {
iter++;
if (!test->setup()) {
log_notice("test setup failed");
return false;
}
if (!test->run()) {
log_notice("test failed");
return false;
}
if (!test->teardown()) {
log_notice("test teardown failed");
return false;
}
if (config.params.nrepeat == 1)
log_info("test successed");
else {
if (config.params.nrepeat)
log_info("test successed (iteration %zi of %zi)", iter,
size_t(config.params.nrepeat));
else
log_info("test successed (iteration %zi)", iter);
config.params.keygen.seed += INT32_C(0xA4F4D37B);
}
} while (config.params.nrepeat == 0 || iter < config.params.nrepeat);
return true;
} catch (const std::exception &pipets) {
failure("***** Exception: %s *****", pipets.what());
return false;
}
return false;
}

View File

@@ -105,8 +105,12 @@ protected:
void db_open();
void db_close();
void txn_begin(bool readonly, unsigned flags = 0);
int breakable_commit();
void txn_end(bool abort);
int breakable_restart();
void txn_restart(bool abort, bool readonly, unsigned flags = 0);
void cursor_open(unsigned dbi);
void cursor_close();
void txn_inject_writefault(void);
void txn_inject_writefault(MDBX_txn *txn);
void fetch_canary();
@@ -117,7 +121,9 @@ protected:
MDBX_dbi db_table_open(bool create);
void db_table_drop(MDBX_dbi handle);
void db_table_clear(MDBX_dbi handle);
void db_table_close(MDBX_dbi handle);
int db_open__begin__table_create_open_clean(MDBX_dbi &dbi);
bool wait4start();
void report(size_t nops_done);
@@ -151,6 +157,13 @@ public:
virtual ~testcase() {}
};
class testcase_ttl : public testcase {
public:
testcase_ttl(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
bool run();
};
class testcase_hill : public testcase {
public:
testcase_hill(const actor_config &config, const mdbx_pid_t pid)
@@ -158,6 +171,13 @@ public:
bool run();
};
class testcase_append : public testcase {
public:
testcase_append(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
bool run();
};
class testcase_deadread : public testcase {
public:
testcase_deadread(const actor_config &config, const mdbx_pid_t pid)

View File

@@ -181,6 +181,8 @@
<ClInclude Include="utils.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="ttl.cc" />
<ClCompile Include="append.cc" />
<ClCompile Include="cases.cc" />
<ClCompile Include="chrono.cc" />
<ClCompile Include="config.cc" />

164
test/ttl.cc Normal file
View File

@@ -0,0 +1,164 @@
/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
#include "test.h"
#include <cmath>
#include <deque>
static unsigned edge2window(uint64_t edge, unsigned window_max) {
const double rnd = u64_to_double1(bleach64(edge));
const unsigned window = window_max - std::lrint(std::pow(window_max, rnd));
return window;
}
static unsigned edge2count(uint64_t edge, unsigned count_max) {
const double rnd = u64_to_double1(prng64_map1_white(edge));
const unsigned count = std::lrint(std::pow(count_max, rnd));
return count;
}
bool testcase_ttl::run() {
MDBX_dbi dbi;
int err = db_open__begin__table_create_open_clean(dbi);
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("ttl: bailout-prepare due '%s'", mdbx_strerror(err));
return true;
}
/* LY: тест "эмуляцией time-to-live":
* - организуется "скользящее окно", которое двигается вперед вдоль
* числовой оси каждую транзакцию.
* - по переднему краю "скользящего окна" записи добавляются в таблицу,
* а по заднему удаляются.
* - количество добавляемых/удаляемых записей псевдослучайно зависит
* от номера транзакции, но с экспоненциальным распределением.
* - размер "скользящего окна" также псевдослучайно зависит от номера
* транзакции с "отрицательным" экспоненциальным распределением
* MAX_WIDTH - exp(rnd(N)), при уменьшении окна сдвигается задний
* край и удаляются записи позади него.
*
* Таким образом имитируется поведение таблицы с TTL: записи стохастически
* добавляются и удаляются, но изредка происходят массивные удаления.
*/
/* LY: для параметризации используем подходящие параметры, которые не имеют
* здесь смысла в первоначальном значении */
const unsigned window_max =
(config.params.batch_read > 999) ? config.params.batch_read : 1000;
const unsigned count_max =
(config.params.batch_write > 999) ? config.params.batch_write : 1000;
log_info("ttl: using `batch_read` value %u for window_max", window_max);
log_info("ttl: using `batch_write` value %u for count_max", count_max);
uint64_t seed =
prng64_map2_white(config.params.keygen.seed) + config.actor_id;
keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */);
key = keygen::alloc(config.params.keylen_max);
data = keygen::alloc(config.params.datalen_max);
const unsigned insert_flags = (config.params.table_flags & MDBX_DUPSORT)
? MDBX_NODUPDATA
: MDBX_NODUPDATA | MDBX_NOOVERWRITE;
std::deque<std::pair<uint64_t, unsigned>> fifo;
uint64_t serial = 0;
while (should_continue()) {
const uint64_t salt = prng64_white(seed) /* mdbx_txn_id(txn_guard.get()) */;
const unsigned window_width = edge2window(salt, window_max);
unsigned head_count = edge2count(salt, count_max);
log_verbose("ttl: step #%zu (serial %" PRIu64
", window %u, count %u) salt %" PRIu64,
nops_completed, serial, window_width, head_count, salt);
if (window_width) {
while (fifo.size() > window_width) {
uint64_t tail_serial = fifo.back().first;
const unsigned tail_count = fifo.back().second;
log_trace("ttl: pop-tail (serial %" PRIu64 ", count %u)", tail_serial,
tail_count);
fifo.pop_back();
for (unsigned n = 0; n < tail_count; ++n) {
log_trace("ttl: remove-tail %" PRIu64, serial);
generate_pair(tail_serial);
err = mdbx_del(txn_guard.get(), dbi, &key->value, &data->value);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("ttl: tail-bailout due '%s'", mdbx_strerror(err));
goto bailout;
}
failure_perror("mdbx_del(tail)", err);
}
if (unlikely(!keyvalue_maker.increment(tail_serial, 1)))
failure("ttl: unexpected key-space overflow on the tail");
}
}
} else {
log_trace("ttl: purge state");
db_table_clear(dbi);
fifo.clear();
}
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("ttl: bailout at commit due '%s'", mdbx_strerror(err));
break;
}
fifo.push_front(std::make_pair(serial, head_count));
retry:
for (unsigned n = 0; n < head_count; ++n) {
log_trace("ttl: insert-head %" PRIu64, serial);
generate_pair(serial);
err = mdbx_put(txn_guard.get(), dbi, &key->value, &data->value,
insert_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("ttl: head-insert skip due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial = fifo.front().first;
fifo.front().second = head_count = n;
goto retry;
}
failure_perror("mdbx_put(head)", err);
}
if (unlikely(!keyvalue_maker.increment(serial, 1))) {
log_notice("ttl: unexpected key-space overflow");
goto bailout;
}
}
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("ttl: head-commit skip due '%s'", mdbx_strerror(err));
serial = fifo.front().first;
fifo.pop_front();
}
report(1);
}
bailout:
txn_end(true);
if (dbi) {
if (config.params.drop_table && !mode_readonly()) {
txn_begin(false);
db_table_drop(dbi);
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("ttl: bailout-clean due '%s'", mdbx_strerror(err));
return true;
}
} else
db_table_close(dbi);
}
return true;
}

View File

@@ -252,21 +252,8 @@ uint64_t entropy_ticks(void) {
//-----------------------------------------------------------------------------
static __inline uint64_t bleach64(uint64_t dirty) {
return mul_64x64_high(bswap64(dirty), UINT64_C(17048867929148541611));
}
static __inline uint32_t bleach32(uint32_t dirty) {
return (uint32_t)((bswap32(dirty) * UINT64_C(2175734609)) >> 32);
}
uint64_t prng64_careless(uint64_t &state) {
state = state * UINT64_C(6364136223846793005) + 1;
return state;
}
uint64_t prng64_white(uint64_t &state) {
state = state * UINT64_C(6364136223846793005) + UINT64_C(1442695040888963407);
state = prng64_map2_careless(state);
return bleach64(state);
}
@@ -366,7 +353,7 @@ void jitter_delay(bool extra) {
cpu_relax();
if (dice > 2) {
unsigned us = entropy_white() &
(extra ? 0xfffff /* 1.05 s */ : 0x3ff /* 1 ms */);
(extra ? 0xffff /* 656 ms */ : 0x3ff /* 1 ms */);
log_trace("== jitter.delay: %0.6f", us / 1000000.0);
osal_udelay(us);
}

View File

@@ -1,4 +1,4 @@
/*
/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -288,6 +288,7 @@ struct simple_checksum {
void push(uint32_t data) {
value += data * UINT64_C(9386433910765580089) + 1;
value ^= value >> 41;
value *= UINT64_C(0xBD9CACC22C6E9571);
}
void push(uint64_t data) {
@@ -304,11 +305,15 @@ struct simple_checksum {
}
void push(const double &data) { push(&data, sizeof(double)); }
void push(const char *cstr) { push(cstr, strlen(cstr)); }
void push(const std::string &str) { push(str.data(), str.size()); }
void push(unsigned salt, const MDBX_val &val) {
push(val.iov_len);
push(salt);
push(val.iov_base, val.iov_len);
}
#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS)
void push(const HANDLE &handle) { push(&handle, sizeof(handle)); }
#endif /* _WINDOWS */
@@ -322,7 +327,60 @@ std::string format(const char *fmt, ...);
uint64_t entropy_ticks(void);
uint64_t entropy_white(void);
uint64_t prng64_careless(uint64_t &state);
static inline uint64_t bleach64(uint64_t v) {
// Tommy Ettinger, https://www.blogger.com/profile/04953541827437796598
// http://mostlymangling.blogspot.com/2019/01/better-stronger-mixer-and-test-procedure.html
v ^= rot64(v, 25) ^ rot64(v, 50);
v *= UINT64_C(0xA24BAED4963EE407);
v ^= rot64(v, 24) ^ rot64(v, 49);
v *= UINT64_C(0x9FB21C651E98DF25);
return v ^ v >> 28;
}
static inline uint32_t bleach32(uint32_t x) {
// https://github.com/skeeto/hash-prospector
// exact bias: 0.17353355999581582
x ^= x >> 16;
x *= UINT32_C(0x7feb352d);
x ^= 0x3027C563 ^ (x >> 15);
x *= UINT32_C(0x846ca68b);
x ^= x >> 16;
return x;
}
static inline uint64_t prng64_map1_careless(uint64_t state) {
return state * UINT64_C(6364136223846793005) + 1;
}
static inline uint64_t prng64_map2_careless(uint64_t state) {
return (state + UINT64_C(1442695040888963407)) *
UINT64_C(6364136223846793005);
}
static inline uint64_t prng64_map1_white(uint64_t state) {
return bleach64(prng64_map1_careless(state));
}
static inline uint64_t prng64_map2_white(uint64_t state) {
return bleach64(prng64_map2_careless(state));
}
static inline uint64_t prng64_careless(uint64_t &state) {
state = prng64_map1_careless(state);
return state;
}
static inline double u64_to_double1(uint64_t v) {
union {
uint64_t u64;
double d;
} casting;
casting.u64 = UINT64_C(0x3ff) << 52 | (v >> 12);
assert(casting.d >= 1.0 && casting.d < 2.0);
return casting.d - 1.0;
}
uint64_t prng64_white(uint64_t &state);
uint32_t prng32(uint64_t &state);
void prng_fill(uint64_t &state, void *ptr, size_t bytes);