diff --git a/TODO.md b/TODO.md index 0d9fd46d..d8e2d0b7 100644 --- a/TODO.md +++ b/TODO.md @@ -11,7 +11,6 @@ For the same reason ~~Github~~ is blacklisted forever. So currently most of the links are broken due to noted malicious ~~Github~~ sabotage. - - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOTLS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). - [More flexible support of asynchronous runtime/framework(s)](https://libmdbx.dqdkfa.ru/dead-github/issues/200). - [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199). @@ -23,6 +22,7 @@ So currently most of the links are broken due to noted malicious ~~Github~~ sabo Done ---- + - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Simple careful mode for working with corrupted DB](https://libmdbx.dqdkfa.ru/dead-github/issues/223). - [Engage an "overlapped I/O" on Windows](https://libmdbx.dqdkfa.ru/dead-github/issues/224). - [Large/Overflow pages accounting for dirty-room](https://libmdbx.dqdkfa.ru/dead-github/issues/192). diff --git a/mdbx.h b/mdbx.h index de16ccff..c94bde3f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2571,9 +2571,7 @@ struct MDBX_envinfo { uint64_t mi_latter_reader_txnid; /**< ID of the last reader transaction */ uint64_t mi_self_latter_reader_txnid; /**< ID of the last reader transaction of caller process */ - uint64_t mi_meta0_txnid, mi_meta0_sign; - uint64_t mi_meta1_txnid, mi_meta1_sign; - uint64_t mi_meta2_txnid, mi_meta2_sign; + uint64_t mi_meta_txnid[3], mi_meta_sign[3]; uint32_t mi_maxreaders; /**< Total reader slots in the environment */ uint32_t mi_numreaders; /**< Max reader slots used in the environment */ uint32_t mi_dxb_pagesize; /**< Database pagesize */ @@ -2590,7 +2588,7 @@ struct MDBX_envinfo { struct { struct { uint64_t x, y; - } current, meta0, meta1, meta2; + } current, meta[3]; } mi_bootid; /** Bytes not explicitly synchronized to disk */ @@ -5525,43 +5523,6 @@ mdbx_env_get_hsr(const MDBX_env *env); * \ingroup c_extra * @{ */ -/** \brief Page types for traverse the b-tree. - * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ -enum MDBX_page_type_t { - MDBX_page_broken, - MDBX_page_meta, - MDBX_page_large, - MDBX_page_branch, - MDBX_page_leaf, - MDBX_page_dupfixed_leaf, - MDBX_subpage_leaf, - MDBX_subpage_dupfixed_leaf, - MDBX_subpage_broken, -}; -#ifndef __cplusplus -typedef enum MDBX_page_type_t MDBX_page_type_t; -#endif - -/** \brief Pseudo-name for MainDB */ -#define MDBX_PGWALK_MAIN ((void *)((ptrdiff_t)0)) -/** \brief Pseudo-name for GarbageCollectorDB */ -#define MDBX_PGWALK_GC ((void *)((ptrdiff_t)-1)) -/** \brief Pseudo-name for MetaPages */ -#define MDBX_PGWALK_META ((void *)((ptrdiff_t)-2)) - -/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */ -typedef int -MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx, - const int deep, const MDBX_val *dbi_name, - const size_t page_size, const MDBX_page_type_t type, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) MDBX_CXX17_NOEXCEPT; - -/** \brief B-tree traversal function. */ -LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, - void *ctx, bool dont_check_keys_ordering); - /** \brief Acquires write-transaction lock. * Provided for custom and/or complex locking scenarios. * \returns A non-zero error value on failure and 0 on success. */ @@ -5718,6 +5679,14 @@ struct MDBX_chk_histogram { * \see mdbx_env_chk() */ typedef struct MDBX_chk_subdb { MDBX_chk_user_subdb_cookie_t *cookie; + +/** \brief Pseudo-name for MainDB */ +#define MDBX_CHK_MAIN ((void *)((ptrdiff_t)0)) +/** \brief Pseudo-name for GarbageCollectorDB */ +#define MDBX_CHK_GC ((void *)((ptrdiff_t)-1)) +/** \brief Pseudo-name for MetaPages */ +#define MDBX_CHK_META ((void *)((ptrdiff_t)-2)) + MDBX_val name; MDBX_db_flags_t flags; int id; @@ -5749,7 +5718,7 @@ typedef struct MDBX_chk_context { MDBX_env *env; MDBX_txn *txn; MDBX_chk_scope_t *scope; - unsigned scope_nesting; + uint8_t scope_nesting; struct { size_t total_payload_bytes; size_t subdb_total, subdb_processed; @@ -5776,7 +5745,7 @@ typedef struct MDBX_chk_callbacks { void (*scope_pop)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, MDBX_chk_scope_t *inner); void (*issue)(MDBX_chk_context_t *ctx, const char *object, - size_t entry_number, const char *issue, const char *extra_fmt, + uint64_t entry_number, const char *issue, const char *extra_fmt, va_list extra_args); MDBX_chk_user_subdb_cookie_t *(*subdb_filter)(MDBX_chk_context_t *ctx, const MDBX_val *name, @@ -5792,16 +5761,14 @@ typedef struct MDBX_chk_callbacks { int (*stage_begin)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage); int (*stage_end)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage, int err); - struct { - MDBX_chk_line_t *(*begin)(MDBX_chk_context_t *ctx, - enum MDBX_chk_severity severity); - void (*flush)(MDBX_chk_line_t *); - void (*done)(MDBX_chk_line_t *); - void (*chars)(MDBX_chk_line_t *, const char *str, size_t len); - void (*format)(MDBX_chk_line_t *, const char *fmt, va_list args); - void (*size)(MDBX_chk_line_t *, const char *prefix, const uint64_t value, - const char *suffix); - } print; + MDBX_chk_line_t *(*print_begin)(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity); + void (*print_flush)(MDBX_chk_line_t *); + void (*print_done)(MDBX_chk_line_t *); + void (*print_chars)(MDBX_chk_line_t *, const char *str, size_t len); + void (*print_format)(MDBX_chk_line_t *, const char *fmt, va_list args); + void (*print_size)(MDBX_chk_line_t *, const char *prefix, + const uint64_t value, const char *suffix); } MDBX_chk_callbacks_t; /** FIXME */ diff --git a/src/base.h b/src/base.h index b8a243e8..fd730945 100644 --- a/src/base.h +++ b/src/base.h @@ -48,6 +48,7 @@ #include #include +#include #include #include #include diff --git a/src/core.c b/src/core.c index d0cb0914..fec25bed 100644 --- a/src/core.c +++ b/src/core.c @@ -5572,7 +5572,7 @@ __cold static void meta_troika_dump(const MDBX_env *env, const meta_ptr_t recent = meta_recent(env, troika); const meta_ptr_t prefer_steady = meta_prefer_steady(env, troika); const meta_ptr_t tail = meta_tail(env, troika); - NOTICE("%" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " + NOTICE("troika: %" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " "head=%d-%" PRIaTXN ".%c, " "base=%d-%" PRIaTXN ".%c, " "tail=%d-%" PRIaTXN ".%c, " @@ -12143,6 +12143,10 @@ static __always_inline bool eq_fast(const MDBX_val *a, const MDBX_val *b) { eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len); } +static int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { + return eq_fast(a, b) ? 0 : 1; +} + static int validate_meta(MDBX_env *env, MDBX_meta *const meta, const MDBX_page *const page, const unsigned meta_number, unsigned *guess_pagesize) { @@ -22247,9 +22251,9 @@ __cold int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; } -__cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *arg, const size_t bytes) { - +__cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, + MDBX_envinfo *out, const size_t bytes, + meta_troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); @@ -22259,18 +22263,18 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, /* environment not yet opened */ #if 1 /* default behavior: returns the available info but zeroed the rest */ - memset(arg, 0, bytes); - arg->mi_geo.lower = env->me_dbgeo.lower; - arg->mi_geo.upper = env->me_dbgeo.upper; - arg->mi_geo.shrink = env->me_dbgeo.shrink; - arg->mi_geo.grow = env->me_dbgeo.grow; - arg->mi_geo.current = env->me_dbgeo.now; - arg->mi_maxreaders = env->me_maxreaders; - arg->mi_dxb_pagesize = env->me_psize; - arg->mi_sys_pagesize = env->me_os_psize; + memset(out, 0, bytes); + out->mi_geo.lower = env->me_dbgeo.lower; + out->mi_geo.upper = env->me_dbgeo.upper; + out->mi_geo.shrink = env->me_dbgeo.shrink; + out->mi_geo.grow = env->me_dbgeo.grow; + out->mi_geo.current = env->me_dbgeo.now; + out->mi_maxreaders = env->me_maxreaders; + out->mi_dxb_pagesize = env->me_psize; + out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { - arg->mi_bootid.current.x = bootid.x; - arg->mi_bootid.current.y = bootid.y; + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; } return MDBX_SUCCESS; #else @@ -22285,123 +22289,119 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) return MDBX_PANIC; - meta_troika_t holder; - meta_troika_t const *troika; if (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) - troika = &txn->tw.troika; - else { - holder = meta_tap(env); - troika = &holder; - } + *troika = txn->tw.troika; + else + *troika = meta_tap(env); const meta_ptr_t head = meta_recent(env, troika); - arg->mi_recent_txnid = head.txnid; - arg->mi_meta0_txnid = troika->txnid[0]; - arg->mi_meta0_sign = unaligned_peek_u64(4, meta0->mm_sign); - arg->mi_meta1_txnid = troika->txnid[1]; - arg->mi_meta1_sign = unaligned_peek_u64(4, meta1->mm_sign); - arg->mi_meta2_txnid = troika->txnid[2]; - arg->mi_meta2_sign = unaligned_peek_u64(4, meta2->mm_sign); + out->mi_recent_txnid = head.txnid; + out->mi_meta_txnid[0] = troika->txnid[0]; + out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->mm_sign); + out->mi_meta_txnid[1] = troika->txnid[1]; + out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->mm_sign); + out->mi_meta_txnid[2] = troika->txnid[2]; + out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->mm_sign); if (likely(bytes > size_before_bootid)) { - memcpy(&arg->mi_bootid.meta0, &meta0->mm_bootid, 16); - memcpy(&arg->mi_bootid.meta1, &meta1->mm_bootid, 16); - memcpy(&arg->mi_bootid.meta2, &meta2->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[0], &meta0->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[1], &meta1->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[2], &meta2->mm_bootid, 16); } const volatile MDBX_meta *txn_meta = head.ptr_v; - arg->mi_last_pgno = txn_meta->mm_geo.next - 1; - arg->mi_geo.current = pgno2bytes(env, txn_meta->mm_geo.now); + out->mi_last_pgno = txn_meta->mm_geo.next - 1; + out->mi_geo.current = pgno2bytes(env, txn_meta->mm_geo.now); if (txn) { - arg->mi_last_pgno = txn->mt_next_pgno - 1; - arg->mi_geo.current = pgno2bytes(env, txn->mt_end_pgno); + out->mi_last_pgno = txn->mt_next_pgno - 1; + out->mi_geo.current = pgno2bytes(env, txn->mt_end_pgno); const txnid_t wanna_meta_txnid = (txn->mt_flags & MDBX_TXN_RDONLY) ? txn->mt_txnid : txn->mt_txnid - xMDBX_TXNID_STEP; - txn_meta = (arg->mi_meta0_txnid == wanna_meta_txnid) ? meta0 : txn_meta; - txn_meta = (arg->mi_meta1_txnid == wanna_meta_txnid) ? meta1 : txn_meta; - txn_meta = (arg->mi_meta2_txnid == wanna_meta_txnid) ? meta2 : txn_meta; + txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta; + txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta; + txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta; } - arg->mi_geo.lower = pgno2bytes(env, txn_meta->mm_geo.lower); - arg->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); - arg->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); - arg->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); + out->mi_geo.lower = pgno2bytes(env, txn_meta->mm_geo.lower); + out->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); + out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); + out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); const uint64_t unsynced_pages = atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) + (atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed) != - (uint32_t)arg->mi_recent_txnid); + (uint32_t)out->mi_recent_txnid); - arg->mi_mapsize = env->me_dxb_mmap.limit; + out->mi_mapsize = env->me_dxb_mmap.limit; const MDBX_lockinfo *const lck = env->me_lck; - arg->mi_maxreaders = env->me_maxreaders; - arg->mi_numreaders = env->me_lck_mmap.lck + out->mi_maxreaders = env->me_maxreaders; + out->mi_numreaders = env->me_lck_mmap.lck ? atomic_load32(&lck->mti_numreaders, mo_Relaxed) : INT32_MAX; - arg->mi_dxb_pagesize = env->me_psize; - arg->mi_sys_pagesize = env->me_os_psize; + out->mi_dxb_pagesize = env->me_psize; + out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { - arg->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); + out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); const uint64_t monotime_now = osal_monotime(); uint64_t ts = atomic_load64(&lck->mti_eoos_timestamp, mo_Relaxed); - arg->mi_since_sync_seconds16dot16 = + out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; ts = atomic_load64(&lck->mti_reader_check_timestamp, mo_Relaxed); - arg->mi_since_reader_check_seconds16dot16 = + out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; - arg->mi_autosync_threshold = pgno2bytes( + out->mi_autosync_threshold = pgno2bytes( env, atomic_load32(&lck->mti_autosync_threshold, mo_Relaxed)); - arg->mi_autosync_period_seconds16dot16 = + out->mi_autosync_period_seconds16dot16 = osal_monotime_to_16dot16_noUnderflow( atomic_load64(&lck->mti_autosync_period, mo_Relaxed)); - arg->mi_bootid.current.x = bootid.x; - arg->mi_bootid.current.y = bootid.y; - arg->mi_mode = env->me_lck_mmap.lck ? lck->mti_envmode.weak : env->me_flags; + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; + out->mi_mode = env->me_lck_mmap.lck ? lck->mti_envmode.weak : env->me_flags; } if (likely(bytes > size_before_pgop_stat)) { #if MDBX_ENABLE_PGOP_STAT - arg->mi_pgop_stat.newly = + out->mi_pgop_stat.newly = atomic_load64(&lck->mti_pgop_stat.newly, mo_Relaxed); - arg->mi_pgop_stat.cow = atomic_load64(&lck->mti_pgop_stat.cow, mo_Relaxed); - arg->mi_pgop_stat.clone = + out->mi_pgop_stat.cow = atomic_load64(&lck->mti_pgop_stat.cow, mo_Relaxed); + out->mi_pgop_stat.clone = atomic_load64(&lck->mti_pgop_stat.clone, mo_Relaxed); - arg->mi_pgop_stat.split = + out->mi_pgop_stat.split = atomic_load64(&lck->mti_pgop_stat.split, mo_Relaxed); - arg->mi_pgop_stat.merge = + out->mi_pgop_stat.merge = atomic_load64(&lck->mti_pgop_stat.merge, mo_Relaxed); - arg->mi_pgop_stat.spill = + out->mi_pgop_stat.spill = atomic_load64(&lck->mti_pgop_stat.spill, mo_Relaxed); - arg->mi_pgop_stat.unspill = + out->mi_pgop_stat.unspill = atomic_load64(&lck->mti_pgop_stat.unspill, mo_Relaxed); - arg->mi_pgop_stat.wops = + out->mi_pgop_stat.wops = atomic_load64(&lck->mti_pgop_stat.wops, mo_Relaxed); - arg->mi_pgop_stat.prefault = + out->mi_pgop_stat.prefault = atomic_load64(&lck->mti_pgop_stat.prefault, mo_Relaxed); - arg->mi_pgop_stat.mincore = + out->mi_pgop_stat.mincore = atomic_load64(&lck->mti_pgop_stat.mincore, mo_Relaxed); - arg->mi_pgop_stat.msync = + out->mi_pgop_stat.msync = atomic_load64(&lck->mti_pgop_stat.msync, mo_Relaxed); - arg->mi_pgop_stat.fsync = + out->mi_pgop_stat.fsync = atomic_load64(&lck->mti_pgop_stat.fsync, mo_Relaxed); #else memset(&arg->mi_pgop_stat, 0, sizeof(arg->mi_pgop_stat)); #endif /* MDBX_ENABLE_PGOP_STAT*/ } - arg->mi_self_latter_reader_txnid = arg->mi_latter_reader_txnid = - arg->mi_recent_txnid; + out->mi_self_latter_reader_txnid = out->mi_latter_reader_txnid = + out->mi_recent_txnid; if (env->me_lck_mmap.lck) { - for (size_t i = 0; i < arg->mi_numreaders; ++i) { + for (size_t i = 0; i < out->mi_numreaders; ++i) { const uint32_t pid = atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); if (pid) { const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (arg->mi_latter_reader_txnid > txnid) - arg->mi_latter_reader_txnid = txnid; - if (pid == env->me_pid && arg->mi_self_latter_reader_txnid > txnid) - arg->mi_self_latter_reader_txnid = txnid; + if (out->mi_latter_reader_txnid > txnid) + out->mi_latter_reader_txnid = txnid; + if (pid == env->me_pid && out->mi_self_latter_reader_txnid > txnid) + out->mi_self_latter_reader_txnid = txnid; } } } @@ -22410,6 +22410,26 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, return MDBX_SUCCESS; } +__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, + size_t bytes, meta_troika_t *troika) { + MDBX_envinfo snap; + int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + while (1) { + rc = env_info_snap(env, txn, out, bytes, troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16; + snap.mi_since_reader_check_seconds16dot16 = + out->mi_since_reader_check_seconds16dot16; + if (likely(memcmp(&snap, out, bytes) == 0)) + return MDBX_SUCCESS; + memcpy(&snap, out, bytes); + } +} + __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == NULL && txn == NULL) || arg == NULL)) @@ -22436,22 +22456,8 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, bytes != size_before_pgop_stat) return MDBX_EINVAL; - MDBX_envinfo snap; - int rc = fetch_envinfo_ex(env, txn, &snap, sizeof(snap)); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - while (1) { - rc = fetch_envinfo_ex(env, txn, arg, bytes); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - snap.mi_since_sync_seconds16dot16 = arg->mi_since_sync_seconds16dot16; - snap.mi_since_reader_check_seconds16dot16 = - arg->mi_since_reader_check_seconds16dot16; - if (likely(memcmp(&snap, arg, bytes) == 0)) - return MDBX_SUCCESS; - memcpy(&snap, arg, bytes); - } + meta_troika_t troika; + return env_info(env, txn, arg, bytes, &troika); } static __inline MDBX_cmp_func *get_default_keycmp(MDBX_db_flags_t flags) { @@ -22572,23 +22578,21 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, } /* main table? */ - if (table_name == MDBX_PGWALK_MAIN || - table_name->iov_base == MDBX_PGWALK_MAIN) { + if (table_name == MDBX_CHK_MAIN || table_name->iov_base == MDBX_CHK_MAIN) { rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; *dbi = MAIN_DBI; return rc; } - if (table_name == MDBX_PGWALK_GC || table_name->iov_base == MDBX_PGWALK_GC) { + if (table_name == MDBX_CHK_GC || table_name->iov_base == MDBX_CHK_GC) { rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; *dbi = FREE_DBI; return rc; } - if (table_name == MDBX_PGWALK_META || - table_name->iov_base == MDBX_PGWALK_META) { + if (table_name == MDBX_CHK_META || table_name->iov_base == MDBX_CHK_META) { rc = MDBX_EINVAL; goto bailout; } @@ -22781,8 +22785,8 @@ static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { MDBX_val thunk, *name; - if (name_cstr == MDBX_PGWALK_MAIN || name_cstr == MDBX_PGWALK_GC || - name_cstr == MDBX_PGWALK_META) + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || + name_cstr == MDBX_CHK_META) name = (void *)name_cstr; else { thunk.iov_len = strlen(name_cstr); @@ -23457,12 +23461,12 @@ typedef struct mdbx_walk_ctx { bool mw_dont_check_keys_ordering; } mdbx_walk_ctx_t; -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, - const MDBX_val *name, int deep); +__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, + int deep); static MDBX_page_type_t walk_page_type(const MDBX_page *mp) { if (mp) - switch (mp->mp_flags) { + switch (mp->mp_flags & ~P_SPILLED) { case P_BRANCH: return MDBX_page_branch; case P_LEAF: @@ -23471,15 +23475,13 @@ static MDBX_page_type_t walk_page_type(const MDBX_page *mp) { return MDBX_page_dupfixed_leaf; case P_OVERFLOW: return MDBX_page_large; - case P_META: - return MDBX_page_meta; } return MDBX_page_broken; } /* Depth-first tree traversal. */ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, - const MDBX_val *name, int deep, + MDBX_walk_sdb_t *sdb, int deep, txnid_t parent_txnid) { assert(pgno != P_INVALID); MDBX_page *mp = nullptr; @@ -23536,7 +23538,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, pagesize = pgno2bytes(ctx->mw_txn->mt_env, npages); const size_t over_unused = pagesize - over_payload - over_header; const int rc = ctx->mw_visitor(large_pgno, npages, ctx->mw_user, deep, - name, pagesize, MDBX_page_large, err, 1, + sdb, pagesize, MDBX_page_large, err, 1, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -23606,7 +23608,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = - ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_ds(node), + ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_ds(node), subtype, err, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -23624,7 +23626,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = ctx->mw_visitor( - pgno, 1, ctx->mw_user, deep, name, ctx->mw_txn->mt_env->me_psize, type, + pgno, 1, ctx->mw_user, deep, sdb, ctx->mw_txn->mt_env->me_psize, type, err, nentries, payload_size, header_size, unused_size + align_bytes); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -23636,7 +23638,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, MDBX_node *node = page_node(mp, i); if (type == MDBX_page_branch) { assert(err == MDBX_SUCCESS); - err = walk_tree(ctx, node_pgno(node), name, deep + 1, mp->mp_txnid); + err = walk_tree(ctx, node_pgno(node), sdb, deep + 1, mp->mp_txnid); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_RESULT_TRUE) break; @@ -23655,11 +23657,13 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { - MDBX_db db; - memcpy(&db, node_data(node), sizeof(db)); - const MDBX_val subdb_name = {node_key(node), node_ks(node)}; + MDBX_db aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); + MDBX_walk_sdb_t sdb_info = { + {node_key(node), node_ks(node)}, nullptr, nullptr}; + sdb_info.internal = &aligned_db; assert(err == MDBX_SUCCESS); - err = walk_sdb(ctx, &db, &subdb_name, deep + 1); + err = walk_sdb(ctx, &sdb_info, deep + 1); } break; @@ -23669,15 +23673,17 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { - MDBX_db db; - memcpy(&db, node_data(node), sizeof(db)); + MDBX_db aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); assert(ctx->mw_cursor->mc_xcursor == &container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner); assert(err == MDBX_SUCCESS); err = cursor_xinit1(ctx->mw_cursor, node, mp); if (likely(err == MDBX_SUCCESS)) { ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor; - err = walk_tree(ctx, db.md_root, name, deep + 1, mp->mp_txnid); + sdb->nested = &aligned_db; + err = walk_tree(ctx, aligned_db.md_root, sdb, deep + 1, mp->mp_txnid); + sdb->nested = nullptr; MDBX_xcursor *inner_xcursor = container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor); MDBX_cursor_couple *couple = @@ -23692,15 +23698,16 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, return MDBX_SUCCESS; } -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, - const MDBX_val *name, int deep) { - if (unlikely(sdb->md_root == P_INVALID)) +__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, + int deep) { + struct MDBX_db *const db = sdb->internal; + if (unlikely(db->md_root == P_INVALID)) return MDBX_SUCCESS; /* empty db */ MDBX_cursor_couple couple; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; uint8_t dbistate = DBI_VALID | DBI_AUDITED; - int rc = couple_init(&couple, ~0u, ctx->mw_txn, sdb, &dbx, &dbistate); + int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbistate); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -23712,8 +23719,8 @@ __cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, : CC_PAGECHECK; couple.outer.mc_next = ctx->mw_cursor; ctx->mw_cursor = &couple.outer; - rc = walk_tree(ctx, sdb->md_root, name, deep, - sdb->md_mod_txnid ? sdb->md_mod_txnid : ctx->mw_txn->mt_txnid); + rc = walk_tree(ctx, db->md_root, sdb, deep, + db->md_mod_txnid ? db->md_mod_txnid : ctx->mw_txn->mt_txnid); ctx->mw_cursor = couple.outer.mc_next; return rc; } @@ -23731,15 +23738,13 @@ __cold int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, ctx.mw_visitor = visitor; ctx.mw_dont_check_keys_ordering = dont_check_keys_ordering; - rc = visitor(0, NUM_METAS, user, 0, MDBX_PGWALK_META, - pgno2bytes(txn->mt_env, NUM_METAS), MDBX_page_meta, MDBX_SUCCESS, - NUM_METAS, sizeof(MDBX_meta) * NUM_METAS, PAGEHDRSZ * NUM_METAS, - (txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) * - NUM_METAS); - if (!MDBX_IS_ERROR(rc)) - rc = walk_sdb(&ctx, &txn->mt_dbs[FREE_DBI], MDBX_PGWALK_GC, 0); - if (!MDBX_IS_ERROR(rc)) - rc = walk_sdb(&ctx, &txn->mt_dbs[MAIN_DBI], MDBX_PGWALK_MAIN, 0); + MDBX_walk_sdb_t sdb = {{MDBX_CHK_GC, 0}, &txn->mt_dbs[FREE_DBI], nullptr}; + rc = walk_sdb(&ctx, &sdb, 0); + if (!MDBX_IS_ERROR(rc)) { + sdb.name.iov_base = MDBX_CHK_MAIN; + sdb.internal = &txn->mt_dbs[MAIN_DBI]; + rc = walk_sdb(&ctx, &sdb, 0); + } return rc; } @@ -25520,6 +25525,2079 @@ int mdbx_txn_unlock(MDBX_env *env) { return MDBX_SUCCESS; } +/******************************************************************************* + * Checking API */ + +typedef struct MDBX_chk_internal { + MDBX_chk_context_t *usr; + const struct MDBX_chk_callbacks *cb; + uint64_t monotime_timeout; + + size_t *problem_counter; + uint8_t flags; + bool got_break; + bool write_locked; + uint8_t scope_depth; + + MDBX_chk_subdb_t subdb_gc, subdb_main; + int16_t *pagemap; + MDBX_chk_subdb_t *last_lookup; + const void *last_nested; + MDBX_chk_scope_t scope_stack[12]; + MDBX_chk_subdb_t *subdb[MDBX_MAX_DBI + CORE_DBS]; + + MDBX_envinfo envinfo; + meta_troika_t troika; + MDBX_val v2a_buf; +} MDBX_chk_internal_t; + +__cold static int chk_check_break(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + return (chk->got_break || (chk->cb->check_break && + (chk->got_break = chk->cb->check_break(chk->usr)))) + ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; +} + +__cold static void chk_line_end(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_done)) + chk->cb->print_done(line); + } +} + +__cold __must_check_result static MDBX_chk_line_t * +chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { + MDBX_chk_internal_t *const chk = scope->internal; + if (severity < MDBX_chk_warning) + mdbx_env_chk_problem(chk->usr); + MDBX_chk_line_t *line = nullptr; + if (likely(chk->cb->print_begin)) { + line = chk->cb->print_begin(chk->usr, severity); + if (likely(line)) { + assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty)); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->ctx = chk->usr; + } + } + return line; +} + +__cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + enum MDBX_chk_severity severity = line->severity; + chk_line_end(line); + line = chk_line_begin(chk->usr->scope, severity); + } + return line; +} + +__cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_flush)) { + chk->cb->print_flush(line); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->out = line->begin; + } + } + return line; +} + +__cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { + if (likely(line && need)) { + size_t have = line->end - line->out; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (need > have) { + line = chk_flush(line); + have = line->end - line->out; + } + return (need < have) ? need : have; + } + return 0; +} + +__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, + const char *str) { + if (likely(line && str && *str)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + size_t left = strlen(str); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_chars) { + chk->cb->print_chars(line, str, left); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else + do { + size_t chunk = chk_print_wanna(line, left); + assert(chunk <= left); + if (unlikely(!chunk)) + break; + memcpy(line->out, str, chunk); + line->out += chunk; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + str += chunk; + left -= chunk; + } while (left); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, + const char *fmt, va_list args) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_format) { + chk->cb->print_format(line, fmt, args); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else { + va_list ones; + va_copy(ones, args); + const int needed = vsnprintf(nullptr, 0, fmt, ones); + va_end(ones); + if (likely(needed > 0)) { + const size_t have = chk_print_wanna(line, needed); + if (likely(have > 0)) { + int written = vsnprintf(line->out, have, fmt, args); + if (likely(written > 0)) + line->out += written; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } + } + } + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) + chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { + if (likely(line)) { + // MDBX_chk_internal_t *chk = line->ctx->internal; + va_list args; + va_start(args, fmt); + line = chk_print_va(line, fmt, args); + va_end(args); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, + const char *prefix, + const uint64_t value, + const char *suffix) { + static const char sf[] = + "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + prefix = prefix ? prefix : ""; + suffix = suffix ? suffix : ""; + if (chk->cb->print_size) + chk->cb->print_size(line, prefix, value, suffix); + else + for (unsigned i = 0;; ++i) { + const unsigned scale = 10 + i * 10; + const uint64_t rounded = value + (UINT64_C(5) << (scale - 10)); + const uint64_t integer = rounded >> scale; + const uint64_t fractional = + (rounded - (integer << scale)) * 100u >> scale; + if ((rounded >> scale) <= 1000) + return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, + value, (unsigned)integer, (unsigned)fractional, + sf[i], suffix); + } + line->empty = false; + } + return line; +} + +__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, + const char *subj) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (line) + chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, + mdbx_strerror(err), err))); + else + debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", + subj, mdbx_strerror(err), err); + return err; +} + +__cold static void MDBX_PRINTF_ARGS(5, 6) + chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, + uint64_t entry_number, const char *caption, + const char *extra_fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_issue_t *issue = chk->usr->scope->issues; + while (issue) { + if (issue->caption == caption) { + issue->count += 1; + break; + } else + issue = issue->next; + } + const bool fresh = issue == nullptr; + if (fresh) { + issue = osal_malloc(sizeof(*issue)); + if (likely(issue)) { + issue->caption = caption; + issue->count = 1; + issue->next = chk->usr->scope->issues; + chk->usr->scope->issues = issue; + } else + chk_error_rc(scope, ENOMEM, "adding issue"); + } + + va_list args; + va_start(args, extra_fmt); + if (chk->cb->issue) { + mdbx_env_chk_problem(chk->usr); + chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args); + } else { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (entry_number != UINT64_MAX) + chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption); + else + chk_print(line, "%s: %s", object, caption); + if (extra_fmt) + chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")"); + chk_line_end(fresh ? chk_flush(line) : line); + } + va_end(args); +} + +__cold static void MDBX_PRINTF_ARGS(2, 3) + chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + va_list args; + va_start(args, fmt); + if (likely(chk->cb->issue)) { + mdbx_env_chk_problem(chk->usr); + chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); + } else + chk_line_end( + chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); + va_end(args); +} + +__cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) { + assert(chk->scope_depth > 0); + MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth; + MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr; + if (!outer || outer->stage != inner->stage) { + if (err == MDBX_SUCCESS && *chk->problem_counter) + err = MDBX_PROBLEM; + else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err)) + *chk->problem_counter = 1; + if (chk->problem_counter != &chk->usr->result.total_problems) { + chk->usr->result.total_problems += *chk->problem_counter; + chk->problem_counter = &chk->usr->result.total_problems; + } + if (chk->cb->stage_end) + err = chk->cb->stage_end(chk->usr, inner->stage, err); + } + if (chk->cb->scope_conclude) + err = chk->cb->scope_conclude(chk->usr, outer, inner, err); + chk->usr->scope = outer; + chk->usr->scope_nesting = chk->scope_depth -= 1; + if (outer) + outer->subtotal_issues += inner->subtotal_issues; + if (chk->cb->scope_pop) + chk->cb->scope_pop(chk->usr, outer, inner); + + while (inner->issues) { + MDBX_chk_issue_t *next = inner->issues->next; + osal_free(inner->issues); + inner->issues = next; + } + memset(inner, -1, sizeof(*inner)); + return err; +} + +__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, + int verbosity_adjustment, + enum MDBX_chk_stage stage, + const void *object, size_t *problems, + const char *fmt, va_list args) { + if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack))) + return MDBX_BACKLOG_DEPLETED; + + MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth; + const int verbosity = + outer->verbosity + + (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); + MDBX_chk_scope_t *const inner = outer + 1; + memset(inner, 0, sizeof(*inner)); + inner->internal = outer->internal; + inner->stage = stage ? stage : (stage = outer->stage); + inner->object = object; + inner->verbosity = (verbosity < MDBX_chk_warning) + ? MDBX_chk_warning + : (enum MDBX_chk_severity)verbosity; + if (problems) + chk->problem_counter = problems; + else if (!chk->problem_counter || outer->stage != stage) + chk->problem_counter = &chk->usr->result.total_problems; + + if (chk->cb->scope_push) { + const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + chk->usr->scope = inner; + chk->usr->scope_nesting = chk->scope_depth += 1; + + if (stage != outer->stage && chk->cb->stage_begin) { + int err = chk->cb->stage_begin(chk->usr, stage); + if (unlikely(err != MDBX_SUCCESS)) { + err = chk_scope_end(chk, err); + assert(err != MDBX_SUCCESS); + return err ? err : MDBX_RESULT_TRUE; + } + } + return MDBX_SUCCESS; +} + +__cold static int MDBX_PRINTF_ARGS(6, 7) + chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, + enum MDBX_chk_stage stage, const void *object, + size_t *problems, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, + problems, fmt, args); + va_end(args); + return rc; +} + +__cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) { + MDBX_chk_internal_t *const chk = target->internal; + assert(target <= chk->usr->scope); + while (chk->usr->scope > target) + err = chk_scope_end(chk, err); + return err; +} + +__cold void chk_scope_pop(MDBX_chk_scope_t *const inner) { + if (inner && inner > inner->internal->scope_stack) + chk_scope_restore(inner - 1, MDBX_SUCCESS); +} + +__cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4) + chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, + const char *fmt, ...) { + chk_scope_restore(scope, MDBX_SUCCESS); + va_list args; + va_start(args, fmt); + int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, + scope->stage, nullptr, nullptr, fmt, args); + va_end(args); + return err ? nullptr : scope + 1; +} + +__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, + const MDBX_val *val) { + if (val == MDBX_CHK_MAIN) + return "@MAIN"; + if (val == MDBX_CHK_GC) + return "@GC"; + if (val == MDBX_CHK_META) + return "@META"; + + const unsigned char *const data = val->iov_base; + const size_t len = val->iov_len; + if (data == MDBX_CHK_MAIN) + return "@MAIN"; + if (data == MDBX_CHK_GC) + return "@GC"; + if (data == MDBX_CHK_META) + return "@META"; + + if (!len) + return ""; + if (!data) + return ""; + if (len > 65536) { + const size_t enough = 42; + if (chk->v2a_buf.iov_len < enough) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = enough; + } + snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, + "", len); + return chk->v2a_buf.iov_base; + } + + bool printable = true; + bool quoting = false; + size_t xchars = 0; + for (size_t i = 0; i < len && printable; ++i) { + quoting = quoting || !(data[i] == '_' || isalnum(data[i])); + printable = + isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); + } + + size_t need = len + 1; + if (quoting || !printable) + need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; + if (need > chk->v2a_buf.iov_len) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, need); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = need; + } + + static const char hex[] = "0123456789abcdef"; + char *w = chk->v2a_buf.iov_base; + if (!quoting) { + memcpy(w, data, len); + w += len; + } else if (printable) { + *w++ = '\''; + for (size_t i = 0; i < len; ++i) { + if (data[i] < ' ') { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4); + w[0] = '\\'; + w[1] = 'x'; + w[2] = hex[data[i] >> 4]; + w[3] = hex[data[i] & 15]; + w += 4; + } else if (strchr("\"'`\\", data[i])) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = '\\'; + w[1] = data[i]; + w += 2; + } else { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1); + *w++ = data[i]; + } + } + *w++ = '\''; + } else { + *w++ = '\\'; + *w++ = 'x'; + for (size_t i = 0; i < len; ++i) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = hex[data[i] >> 4]; + w[1] = hex[data[i] & 15]; + w += 2; + } + } + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w); + *w = 0; + return chk->v2a_buf.iov_base; +} + +__cold static void chk_dispose(MDBX_chk_internal_t *chk) { + assert(chk->subdb[FREE_DBI] == &chk->subdb_gc); + assert(chk->subdb[MAIN_DBI] == &chk->subdb_main); + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + if (sdb) { + chk->subdb[i] = nullptr; + if (chk->cb->subdb_dispose && sdb->cookie) { + chk->cb->subdb_dispose(chk->usr, sdb); + sdb->cookie = nullptr; + } + if (sdb != &chk->subdb_gc && sdb != &chk->subdb_main) { + osal_free(sdb); + } + } + } + osal_free(chk->v2a_buf.iov_base); + osal_free(chk->pagemap); + chk->usr->internal = nullptr; + chk->usr->scope = nullptr; + chk->pagemap = nullptr; + memset(chk, 0xDD, sizeof(*chk)); + osal_free(chk); +} + +static size_t div_8s(size_t numerator, size_t divider) { + assert(numerator <= (SIZE_MAX >> 8)); + return (numerator << 8) / divider; +} + +static size_t mul_8s(size_t quotient, size_t multiplier) { + size_t hi = multiplier * (quotient >> 8); + size_t lo = multiplier * (quotient & 255) + 128; + return hi + (lo >> 8); +} + +static void histogram_reduce(struct MDBX_chk_histogram *p) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + // ищем пару для слияния с минимальной ошибкой + size_t min_err = SIZE_MAX, min_i = last - 1; + for (size_t i = 0; i < last; ++i) { + const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, + s1 = p->ranges[i].amount; + const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, + s2 = p->ranges[i + 1].amount; + const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2; + assert(s1 > 0 && b1 > 0 && b1 < e1); + assert(s2 > 0 && b2 > 0 && b2 < e2); + assert(e1 <= b2); + // за ошибку принимаем площадь изменений на гистограмме при слиянии + const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx); + const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1); + const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2); + const size_t dx = mul_8s(hx, b2 - e1); + const size_t err = d1 + d2 + dx; + if (min_err >= err) { + min_i = i; + min_err = err; + } + } + // объединяем + p->ranges[min_i].end = p->ranges[min_i + 1].end; + p->ranges[min_i].amount += p->ranges[min_i + 1].amount; + p->ranges[min_i].count += p->ranges[min_i + 1].count; + if (min_i < last) + // перемещаем хвост + memmove(p->ranges + min_i, p->ranges + min_i + 1, + (last - min_i) * sizeof(p->ranges[0])); + // обнуляем последний элемент и продолжаем + p->ranges[last].count = 0; +} + +static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { + STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2); + p->amount += n; + p->count += 1; + if (likely(n < 2)) { + p->ones += n; + p->pad += 1; + } else + for (;;) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + size_t i = 0; + while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) { + if (n < p->ranges[i].end) { + // значение попадает в существующий интервал + p->ranges[i].amount += n; + p->ranges[i].count += 1; + return; + } + ++i; + } + if (p->ranges[last].count == 0) { + // использованы еще не все слоты, добавляем интервал + assert(i < size); + if (p->ranges[i].count) { + assert(i < last); + // раздвигаем + memmove(p->ranges + i + 1, p->ranges + i, + (last - i) * sizeof(p->ranges[0])); + } + p->ranges[i].begin = n; + p->ranges[i].end = n + 1; + p->ranges[i].amount = n; + p->ranges[i].count = 1; + return; + } + histogram_reduce(p); + } +} + +__cold static MDBX_chk_line_t * +histogram_dist(MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + line = chk_print(line, "%s:", prefix); + const char *comma = ""; + const size_t first_val = amount ? histogram->ones : histogram->pad; + if (first_val) { + chk_print(line, " %s=%" PRIuSIZE, first, first_val); + comma = ","; + } + for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n) + if (histogram->ranges[n].count) { + chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin); + if (histogram->ranges[n].begin != histogram->ranges[n].end - 1) + chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1); + line = chk_print(line, "=%" PRIuSIZE, + amount ? histogram->ranges[n].amount + : histogram->ranges[n].count); + comma = ","; + } + return line; +} + +__cold static MDBX_chk_line_t * +histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + if (histogram->count) { + line = chk_print(line, "%s %" PRIuSIZE, prefix, + amount ? histogram->amount : histogram->count); + if (scope->verbosity > MDBX_chk_info) + line = chk_puts( + histogram_dist(line, histogram, " (distribution", first, amount), + ")"); + } + return line; +} + +//----------------------------------------------------------------------------- + +__cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, + const MDBX_walk_sdb_t *in, + MDBX_chk_subdb_t **out) { + MDBX_chk_internal_t *const chk = scope->internal; + if (chk->last_lookup && + chk->last_lookup->name.iov_base == in->name.iov_base) { + *out = chk->last_lookup; + return MDBX_SUCCESS; + } + + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *sdb = chk->subdb[i]; + if (!sdb) { + sdb = osal_calloc(1, sizeof(MDBX_chk_subdb_t)); + if (unlikely(!sdb)) { + *out = nullptr; + return chk_error_rc(scope, MDBX_ENOMEM, "alloc_subDB"); + } + chk->subdb[i] = sdb; + sdb->flags = in->internal->md_flags; + sdb->id = -1; + sdb->name = in->name; + } + if (sdb->name.iov_base == in->name.iov_base) { + if (sdb->id < 0) { + sdb->id = (int)i; + sdb->cookie = + chk->cb->subdb_filter + ? chk->cb->subdb_filter(chk->usr, &sdb->name, sdb->flags) + : (void *)(intptr_t)-1; + } + *out = (chk->last_lookup = sdb); + return MDBX_SUCCESS; + } + } + chk_scope_issue(scope, "too many subDBs > %u", + (unsigned)ARRAY_LENGTH(chk->subdb) - CORE_DBS - /* meta */ 1); + *out = nullptr; + return MDBX_PROBLEM; +} + +//------------------------------------------------------------------------------ + +__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, + const unsigned num) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose); + MDBX_chk_internal_t *const chk = scope->internal; + if (line) { + MDBX_env *const env = chk->usr->env; + const bool have_bootid = (chk->envinfo.mi_bootid.current.x | + chk->envinfo.mi_bootid.current.y) != 0; + const bool bootid_match = + have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], + &chk->envinfo.mi_bootid.current, + sizeof(chk->envinfo.mi_bootid.current)) == 0; + + line = chk_print(line, "meta-%u: ", num); + switch (chk->envinfo.mi_meta_sign[num]) { + case MDBX_DATASIGN_NONE: + line = chk_puts(line, "no-sync/legacy"); + break; + case MDBX_DATASIGN_WEAK: + line = chk_print(line, "weak-%s", + have_bootid + ? (bootid_match ? "intact (same boot-id)" : "dead") + : "unknown (no boot-id)"); + break; + default: + line = chk_puts(line, "steady"); + break; + } + const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; + line = chk_print(line, " txn#%" PRIaTXN, meta_txnid); + + const char *status = "stay"; + if (num == chk->troika.recent) + status = "head"; + else if (num == TROIKA_TAIL(&chk->troika)) + status = "tail"; + line = chk_print(line, ", %s", status); + + if (env->me_stuck_meta >= 0) { + if (num == (unsigned)env->me_stuck_meta) + line = chk_print(line, ", %s", "forced for checking"); + } else if (meta_txnid > chk->envinfo.mi_recent_txnid && + (env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == + MDBX_EXCLUSIVE) + line = chk_print(line, + ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 + " >>> %" PRIu64 ")", + meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, + chk->envinfo.mi_recent_txnid); + chk_line_end(line); + } +} + +__cold static int +chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, + const int deep, const MDBX_walk_sdb_t *sdb_info, + const size_t page_size, const MDBX_page_type_t pagetype, + const MDBX_error_t page_err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes) { + MDBX_chk_scope_t *const scope = ctx; + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + + MDBX_chk_subdb_t *sdb; + int err = chk_get_sdb(scope, sdb_info, &sdb); + if (unlikely(err)) + return err; + + if (deep > 42) { + chk_scope_issue(scope, "too deeply %u", deep); + return MDBX_CORRUPTED /* avoid infinite loop/recursion */; + } + histogram_acc(deep, &sdb->histogram.deep); + usr->result.processed_pages += npages; + const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; + + int height = deep + 1; + if (sdb->id >= CORE_DBS) + height -= usr->txn->mt_dbs[MAIN_DBI].md_depth; + const struct MDBX_db *nested = sdb_info->nested; + if (nested) { + if (sdb->flags & MDBX_DUPSORT) + height -= sdb_info->internal->md_depth; + else { + chk_object_issue(scope, "nested tree", pgno, "unexpected", + "subDb %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), + sdb->flags, deep); + nested = nullptr; + } + } else + chk->last_nested = nullptr; + + const char *pagetype_caption; + bool branch = false; + switch (pagetype) { + default: + chk_object_issue(scope, "page", pgno, "unknown page-type", + "type %u, deep %i", (unsigned)pagetype, deep); + pagetype_caption = "unknown"; + sdb->pages.other += npages; + break; + case MDBX_page_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken"; + sdb->pages.other += npages; + break; + case MDBX_subpage_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken-subpage"; + sdb->pages.other += npages; + break; + case MDBX_page_large: + pagetype_caption = "large"; + histogram_acc(npages, &sdb->histogram.large_pages); + if (sdb->flags & MDBX_DUPSORT) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + case MDBX_page_branch: + branch = true; + if (!nested) { + pagetype_caption = "branch"; + sdb->pages.branch += 1; + } else { + pagetype_caption = "nested-branch"; + sdb->pages.nested_branch += 1; + } + break; + case MDBX_page_dupfixed_leaf: + if (!nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + /* fall through */ + __fallthrough; + case MDBX_page_leaf: + if (!nested) { + pagetype_caption = "leaf"; + sdb->pages.leaf += 1; + if (height != sdb_info->internal->md_depth) + chk_object_issue(scope, "page", pgno, "wrong tree height", + "actual %i != %i subDb %s", height, + sdb_info->internal->md_depth, + chk_v2a(chk, &sdb->name)); + } else { + pagetype_caption = + (pagetype == MDBX_page_leaf) ? "nested-leaf" : "nested-leaf-dupfixed"; + sdb->pages.nested_leaf += 1; + if (chk->last_nested != nested) { + histogram_acc(height, &sdb->histogram.nested_tree); + chk->last_nested = nested; + } + if (height != nested->md_depth) + chk_object_issue(scope, "page", pgno, "wrong nested-tree height", + "actual %i != %i dupsort-node %s", height, + nested->md_depth, chk_v2a(chk, &sdb->name)); + } + break; + case MDBX_subpage_dupfixed_leaf: + case MDBX_subpage_leaf: + pagetype_caption = (pagetype == MDBX_subpage_leaf) ? "subleaf-dupsort" + : "subleaf-dupfixed"; + sdb->pages.nested_subleaf += 1; + if ((sdb->flags & MDBX_DUPSORT) == 0 || nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + } + + if (npages) { + if (sdb->cookie) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (npages == 1) + chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); + else + chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, + npages); + chk_line_end( + chk_print(line, + " of %s: header %" PRIiPTR ", %s %" PRIiPTR + ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i", + chk_v2a(chk, &sdb->name), header_bytes, + (pagetype == MDBX_page_branch) ? "keys" : "entries", + nentries, payload_bytes, unused_bytes, deep)); + } + + bool already_used = false; + for (unsigned n = 0; n < npages; ++n) { + const size_t spanpgno = pgno + n; + if (spanpgno >= usr->result.alloc_pages) { + chk_object_issue(scope, "page", spanpgno, "wrong page-no", + "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", + pagetype_caption, spanpgno, usr->result.alloc_pages, + deep); + sdb->pages.all += 1; + } else if (chk->pagemap[spanpgno]) { + const MDBX_chk_subdb_t *const rival = + chk->subdb[chk->pagemap[spanpgno] - 1]; + chk_object_issue(scope, "page", spanpgno, + (branch && rival == sdb) ? "loop" : "already used", + "%s-page: by %s, deep %i", pagetype_caption, + chk_v2a(chk, &rival->name), deep); + already_used = true; + } else { + chk->pagemap[spanpgno] = (int16_t)sdb->id + 1; + sdb->pages.all += 1; + } + } + + if (already_used) + return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ + : MDBX_SUCCESS; + } + + if (MDBX_IS_ERROR(page_err)) { + chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", + pagetype_caption); + } else { + if (unused_bytes > page_size) + chk_object_issue(scope, "page", pgno, "illegal unused-bytes", + "%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0, + unused_bytes, env->me_psize); + + if (header_bytes < (int)sizeof(long) || + (size_t)header_bytes >= env->me_psize - sizeof(long)) { + chk_object_issue(scope, "page", pgno, "illegal header-length", + "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, + pagetype_caption, sizeof(long), header_bytes, + env->me_psize - sizeof(long)); + } + if (payload_bytes < 1) { + if (nentries > 1) { + chk_object_issue(scope, "page", pgno, "zero size-of-entry", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries", + pagetype_caption, payload_bytes, nentries); + } else { + chk_object_issue(scope, "page", pgno, "empty", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries, deep %i", + pagetype_caption, payload_bytes, nentries, deep); + sdb->pages.empty += 1; + } + } + + if (npages) { + if (page_bytes != page_size) { + chk_object_issue(scope, "page", pgno, "misused", + "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR + "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", + pagetype_caption, page_size, page_bytes, header_bytes, + payload_bytes, unused_bytes, deep); + if (page_size > page_bytes) + sdb->lost_bytes += page_size - page_bytes; + } else { + sdb->payload_bytes += payload_bytes + header_bytes; + usr->result.total_payload_bytes += payload_bytes + header_bytes; + } + } + } + return chk_check_break(scope); +} + +__cold static int chk_tree(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + +#if defined(_WIN32) || defined(_WIN64) + SetLastError(ERROR_SUCCESS); +#else + errno = 0; +#endif /* Windows */ + chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap)); + if (!chk->pagemap) { + int err = osal_get_errno(); + return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc"); + } + + if (scope->verbosity > MDBX_chk_info) + chk_scope_push(scope, 0, "Walking pages..."); + /* always skip key ordering checking + * to avoid MDBX_CORRUPTED in case custom comparators were used */ + usr->result.processed_pages = NUM_METAS; + int err = mdbx_env_pgwalk(txn, chk_pgvisitor, scope, true); + if (MDBX_IS_ERROR(err) && err != MDBX_EINTR) + chk_error_rc(scope, err, "mdbx_env_pgwalk"); + + for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n) + if (!chk->pagemap[n]) + usr->result.unused_pages += 1; + + MDBX_chk_subdb_t total; + memset(&total, 0, sizeof(total)); + total.pages.all = NUM_METAS; + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + total.payload_bytes += sdb->payload_bytes; + total.lost_bytes += sdb->lost_bytes; + total.pages.all += sdb->pages.all; + total.pages.empty += sdb->pages.empty; + total.pages.other += sdb->pages.other; + total.pages.branch += sdb->pages.branch; + total.pages.leaf += sdb->pages.leaf; + total.pages.nested_branch += sdb->pages.nested_branch; + total.pages.nested_leaf += sdb->pages.nested_leaf; + total.pages.nested_subleaf += sdb->pages.nested_subleaf; + } + assert(total.pages.all == usr->result.processed_pages); + + const size_t total_page_bytes = pgno2bytes(env, total.pages.all); + if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose) + chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "walked %zu pages, left/unused %zu" + ", %" PRIuSIZE " problem(s)", + usr->result.processed_pages, + usr->result.unused_pages, + usr->scope->subtotal_issues)); + + err = chk_scope_restore(scope, err); + if (scope->verbosity > MDBX_chk_info) { + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + MDBX_chk_scope_t *inner = + chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name)); + if (sdb->pages.all == 0) + chk_line_end( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); + else { + MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); + if (line) { + line = chk_print(line, "page usage: subtotal %" PRIuSIZE, + sdb->pages.all); + const size_t branch_pages = + sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf + + sdb->pages.nested_subleaf; + if (sdb->pages.other) + line = chk_print(line, ", other %" PRIuSIZE, sdb->pages.other); + if (sdb->pages.other == 0 || + (branch_pages | leaf_pages | sdb->histogram.large_pages.count) != + 0) { + line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, + branch_pages, leaf_pages); + if (sdb->histogram.large_pages.count || + (sdb->flags & MDBX_DUPSORT) == 0) { + line = chk_print(line, ", large %" PRIuSIZE, + sdb->histogram.large_pages.count); + if (sdb->histogram.large_pages.amount | + sdb->histogram.large_pages.count) + line = histogram_print(inner, line, &sdb->histogram.large_pages, + " amount", "single", true); + } + } + line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep, + "tree deep density", "1", false); + if (sdb != &chk->subdb_gc && sdb->histogram.nested_tree.count) { + line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, + sdb->histogram.nested_tree.count); + line = histogram_dist(line, &sdb->histogram.nested_tree, " density", + "1", false); + line = chk_print(chk_line_feed(line), + "nested tree(s) pages %" PRIuSIZE + ": branch %" PRIuSIZE ", leaf %" PRIuSIZE + ", subleaf %" PRIuSIZE, + sdb->pages.nested_branch + sdb->pages.nested_leaf, + sdb->pages.nested_branch, sdb->pages.nested_leaf, + sdb->pages.nested_subleaf); + } + + const size_t bytes = pgno2bytes(env, sdb->pages.all); + line = chk_print( + chk_line_feed(line), + "page filling: subtotal %" PRIuSIZE + " bytes (%.1f%%), payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", + bytes, bytes * 100.0 / total_page_bytes, sdb->payload_bytes, + sdb->payload_bytes * 100.0 / bytes, bytes - sdb->payload_bytes, + (bytes - sdb->payload_bytes) * 100.0 / bytes); + if (sdb->pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", + sdb->pages.empty); + if (sdb->lost_bytes) + line = + chk_print(line, ", %" PRIuSIZE " bytes lost", sdb->lost_bytes); + chk_line_end(line); + } + } + chk_scope_restore(scope, 0); + } + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, + "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," + " average fill %.1f%%", + total_page_bytes, usr->result.total_payload_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes, + total_page_bytes - usr->result.total_payload_bytes, + (total_page_bytes - usr->result.total_payload_bytes) * + 100.0 / total_page_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes); + if (total.pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty); + if (total.lost_bytes) + line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes); + chk_line_end(line); + return err; +} + +typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, const size_t record_number, + const MDBX_val *key, const MDBX_val *data); + +__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + int err = MDBX_SUCCESS; + assert(sdb->cookie); + if (chk->cb->subdb_handle_kv) + err = chk->cb->subdb_handle_kv(chk->usr, sdb, record_number, key, data); + return err ? err : chk_check_break(scope); +} + +__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, + MDBX_chk_subdb_t *sdb, chk_kv_visitor *handler) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + MDBX_cursor *cursor = nullptr; + size_t record_count = 0, dups = 0, sub_databases = 0; + int err; + + if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->mt_flags) { + chk_line_end( + chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), + "abort processing %s due to a previous error", + chk_v2a(chk, &sdb->name)))); + err = MDBX_BAD_TXN; + goto bailout; + } + + if (0 > (int)dbi) { + err = dbi_open( + txn, &sdb->name, MDBX_DB_ACCEDE, &dbi, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); + if (unlikely(err)) { + chk_error_rc(scope, err, "mdbx_dbi_open"); + goto bailout; + } + } + + const MDBX_db *const db = txn->mt_dbs + dbi; + if (handler) { + const char *key_mode = nullptr; + switch (sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + case 0: + key_mode = "usual"; + break; + case MDBX_REVERSEKEY: + key_mode = "reserve"; + break; + case MDBX_INTEGERKEY: + key_mode = "ordinal"; + break; + case MDBX_REVERSEKEY | MDBX_INTEGERKEY: + key_mode = "msgpack"; + break; + default: + key_mode = "inconsistent"; + chk_scope_issue(scope, "wrong key-mode (0x%x)", + sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); + } + + const char *value_mode = nullptr; + switch (sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | + MDBX_INTEGERDUP)) { + case 0: + value_mode = "single"; + break; + case MDBX_DUPSORT: + value_mode = "multi"; + break; + case MDBX_DUPSORT | MDBX_REVERSEDUP: + value_mode = "multi-reverse"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED: + value_mode = "multi-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: + value_mode = "multi-reverse-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: + value_mode = "multi-ordinal"; + break; + case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "multi-msgpack"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "reserved"; + break; + default: + value_mode = "inconsistent"; + chk_scope_issue(scope, "wrong value-mode (0x%x)", + sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | + MDBX_DUPFIXED | MDBX_INTEGERDUP)); + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, + value_mode); + line = chk_print(line, ", flags:"); + if (!sdb->flags) + line = chk_print(line, " none"); + else { + const uint8_t f[] = {MDBX_DUPSORT, + MDBX_INTEGERKEY, + MDBX_REVERSEKEY, + MDBX_DUPFIXED, + MDBX_REVERSEDUP, + MDBX_INTEGERDUP, + 0}; + const char *const t[] = {"dupsort", "integerkey", "reversekey", + "dupfixed", "reversedup", "integerdup"}; + for (size_t i = 0; f[i]; i++) + if (sdb->flags & f[i]) + line = chk_print(line, " %s", t[i]); + } + chk_line_end(chk_print(line, " (0x%02X)", sdb->flags)); + + line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "entries %" PRIu64 ", sequence %" PRIu64, db->md_entries, + db->md_seq); + if (db->md_mod_txnid) + line = chk_print(line, ", last modification txn#%" PRIaTXN, + db->md_mod_txnid); + if (db->md_root != P_INVALID) + line = chk_print(line, ", root #%" PRIaPGNO, db->md_root); + chk_line_end(line); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "b-tree depth %u, pages: branch %" PRIaPGNO + ", leaf %" PRIaPGNO ", large %" PRIaPGNO, + db->md_depth, db->md_branch_pages, db->md_leaf_pages, + db->md_overflow_pages)); + + if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t branch_pages = sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf; + const size_t subtotal_pages = + db->md_branch_pages + db->md_leaf_pages + db->md_overflow_pages; + if (subtotal_pages != sdb->pages.all) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", + "subtotal", subtotal_pages, sdb->pages.all); + if (db->md_branch_pages != branch_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "branch", db->md_branch_pages, branch_pages); + if (db->md_leaf_pages != leaf_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "all-leaf", db->md_leaf_pages, leaf_pages); + if (db->md_overflow_pages != sdb->histogram.large_pages.amount) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "large/overlow", db->md_overflow_pages, + sdb->histogram.large_pages.amount); + } + } + + err = mdbx_cursor_open(txn, dbi, &cursor); + if (unlikely(err)) { + chk_error_rc(scope, err, "mdbx_cursor_open"); + goto bailout; + } + if (chk->flags & MDBX_CHK_IGNORE_ORDER) { + cursor->mc_checking |= CC_SKIPORD | CC_PAGECHECK; + if (cursor->mc_xcursor) + cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD | CC_PAGECHECK; + } + + const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, sdb->flags); + MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; + MDBX_val key, data; + err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); + while (err == MDBX_SUCCESS) { + err = chk_check_break(scope); + if (unlikely(err)) + goto bailout; + + bool bad_key = false; + if (key.iov_len > maxkeysize) { + chk_object_issue(scope, "entry", record_count, + "key length exceeds max-key-size", + "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); + bad_key = true; + } else if ((sdb->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && + key.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong key length", + "%" PRIuPTR " != 4or8", key.iov_len); + bad_key = true; + } + + bool bad_data = false; + if ((sdb->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && + data.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong data length", + "%" PRIuPTR " != 4or8", data.iov_len); + bad_data = true; + } + + if (prev_key.iov_base) { + if (prev_data.iov_base && !bad_data && (sdb->flags & MDBX_DUPFIXED) && + prev_data.iov_len != data.iov_len) { + chk_object_issue(scope, "entry", record_count, "different data length", + "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, + data.iov_len); + bad_data = true; + } + + if (!bad_key) { + int cmp = mdbx_cmp(txn, dbi, &key, &prev_key); + if (cmp == 0) { + ++dups; + if ((sdb->flags & MDBX_DUPSORT) == 0) { + chk_object_issue(scope, "entry", record_count, "duplicated entries", + nullptr); + if (prev_data.iov_base && data.iov_len == prev_data.iov_len && + memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + } else if (!bad_data && prev_data.iov_base) { + cmp = mdbx_dcmp(txn, dbi, &data, &prev_data); + if (cmp == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of multi-values", nullptr); + } + } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of entries", nullptr); + } + } + + if (!bad_key) { + if (!prev_key.iov_base && (sdb->flags & MDBX_INTEGERKEY)) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed key-size %" PRIuSIZE, key.iov_len)); + prev_key = key; + } + if (!bad_data) { + if (!prev_data.iov_base && + (sdb->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed data-size %" PRIuSIZE, data.iov_len)); + prev_data = data; + } + + record_count++; + histogram_acc(key.iov_len, &sdb->histogram.key_len); + histogram_acc(data.iov_len, &sdb->histogram.val_len); + + const MDBX_node *const node = + page_node(cursor->mc_pg[cursor->mc_top], cursor->mc_ki[cursor->mc_top]); + if (node_flags(node) == F_SUBDATA) { + if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + chk_object_issue(scope, "entry", record_count, + "unexpected sub-database", "node-flags 0x%x", + node_flags(node)); + else if (data.iov_len != sizeof(MDBX_db)) + chk_object_issue(scope, "entry", record_count, + "wrong sub-database node size", + "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, + sizeof(MDBX_db)); + else if (scope->stage == MDBX_chk_traversal_maindb) + /* подсчитываем subDB при первом проходе */ + sub_databases += 1; + else { + /* обработка subDB при втором проходе */ + MDBX_db aligned_db; + memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); + MDBX_walk_sdb_t sdb_info = {key, nullptr, nullptr}; + sdb_info.internal = &aligned_db; + MDBX_chk_subdb_t *subdb; + err = chk_get_sdb(scope, &sdb_info, &subdb); + if (unlikely(err)) + goto bailout; + if (subdb->cookie) { + err = chk_scope_begin(chk, 0, MDBX_chk_traversal_subdbs, subdb, + &usr->result.problems_kv, + "Processing subDB %s...", + chk_v2a(chk, &subdb->name)); + if (likely(!err)) { + err = chk_db(usr->scope, (MDBX_dbi)-1, subdb, chk_handle_kv); + if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) + usr->result.subdb_processed += 1; + } + err = chk_scope_restore(scope, err); + if (unlikely(err)) + goto bailout; + } else + chk_line_end(chk_flush( + chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s...", chk_v2a(chk, &subdb->name)))); + } + } else if (handler) { + err = handler(scope, sdb, record_count, &key, &data); + if (unlikely(err)) + goto bailout; + } + + err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); + } + + err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") + : MDBX_SUCCESS; + if (err == MDBX_SUCCESS && record_count != db->md_entries) + chk_scope_issue(scope, + "different number of entries %" PRIuSIZE " != %" PRIu64, + record_count, db->md_entries); +bailout: + if (cursor) { + if (handler) { + if (sdb->histogram.key_len.count) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = histogram_dist(line, &sdb->histogram.key_len, + "key length density", "0/1", false); + chk_line_feed(line); + line = histogram_dist(line, &sdb->histogram.val_len, + "value length density", "0/1", false); + chk_line_end(line); + } + if (scope->stage == MDBX_chk_traversal_maindb) + usr->result.subdb_total = sub_databases; + if (chk->cb->subdb_conclude) + err = chk->cb->subdb_conclude(usr, sdb, cursor, err); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); + if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + line = chk_print(line, " %" PRIuSIZE " dups,", dups); + if (sub_databases || dbi == MAIN_DBI) + line = chk_print(line, " %" PRIuSIZE " sub-databases,", sub_databases); + line = chk_print(line, + " %" PRIuSIZE " key's bytes," + " %" PRIuSIZE " data's bytes," + " %" PRIuSIZE " problem(s)", + sdb->histogram.key_len.amount, + sdb->histogram.val_len.amount, scope->subtotal_issues); + chk_line_end(chk_flush(line)); + } + + mdbx_cursor_close(cursor); + if (dbi >= CORE_DBS && !txn->mt_cursors[dbi] && + txn->mt_dbistate[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) + mdbx_dbi_close(env, dbi); + } + return err; +} + +__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + assert(sdb == &chk->subdb_gc); + (void)sdb; + const char *bad = ""; + pgno_t *iptr = data->iov_base; + + if (key->iov_len != sizeof(txnid_t)) + chk_object_issue(scope, "entry", record_number, "wrong txn-id size", + "key-size %" PRIuSIZE, key->iov_len); + else { + txnid_t txnid; + memcpy(&txnid, key->iov_base, sizeof(txnid)); + if (txnid < 1 || txnid > usr->txn->mt_txnid) + chk_object_issue(scope, "entry", record_number, "wrong txn-id", + "%" PRIaTXN, txnid); + else { + if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) + chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, + data->iov_len); + size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; + if (number < 1 || number > MDBX_PGL_LIMIT) + chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, + number); + else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { + chk_object_issue(scope, "entry", txnid, "trimmed idl", + "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", + (number + 1) * sizeof(pgno_t), data->iov_len); + number = data->iov_len / sizeof(pgno_t) - 1; + } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= + /* LY: allow gap up to one page. it is ok + * and better than shink-and-retry inside update_gc() */ + usr->env->me_psize) + chk_object_issue(scope, "entry", txnid, "extra idl space", + "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", + (number + 1) * sizeof(pgno_t), data->iov_len); + + usr->result.gc_pages += number; + if (chk->envinfo.mi_latter_reader_txnid > txnid) + usr->result.reclaimable_pages += number; + + size_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->mt_next_pgno; + size_t span = 1; + for (size_t i = 0; i < number; ++i) { + const size_t pgno = iptr[i]; + if (pgno < NUM_METAS) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " < meta-pages %u", pgno, + NUM_METAS); + else if (pgno >= usr->result.backed_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno, + usr->result.backed_pages); + else if (pgno >= usr->result.alloc_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno, + usr->result.alloc_pages - 1); + else { + if (MDBX_PNL_DISORDERED(prev, pgno)) { + bad = " [bad sequence]"; + chk_object_issue( + scope, "entry", txnid, "bad sequence", + "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, + (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, + pgno); + } + if (chk->pagemap) { + const intptr_t id = chk->pagemap[pgno]; + if (id == 0) + chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; + else if (id > 0) { + assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->subdb)); + chk_object_issue(scope, "page", pgno, "already used", "by %s", + chk_v2a(chk, &chk->subdb[id - 1]->name)); + } else + chk_object_issue(scope, "page", pgno, "already listed in GC", + nullptr); + } + } + prev = pgno; + while (i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span))) + ++span; + } + if (sdb->cookie) { + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), + "transaction %" PRIaTXN ", %" PRIuSIZE + " pages, maxspan %" PRIuSIZE "%s", + txnid, number, span, bad)); + for (size_t i = 0; i < number; i += span) { + const size_t pgno = iptr[i]; + for (span = 1; + i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span)); + ++span) + ; + histogram_acc(span, &sdb->histogram.nested_tree); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (line) { + if (span > 1) + line = + chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); + else + line = chk_print(line, "%9" PRIuSIZE, pgno); + chk_line_end(line); + int err = chk_check_break(scope); + if (err) + return err; + } + } + } + } + } + return chk_check_break(scope); +} + +__cold static int env_chk(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + int err = + env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); + if (unlikely(err)) + return chk_error_rc(scope, err, "env_info"); + + MDBX_chk_line_t *line = + chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); + if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) + line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, + chk->envinfo.mi_bootid.current.x, + chk->envinfo.mi_bootid.current.y); + else + line = chk_puts(line, "unavailable"); + chk_line_end(line); + + err = osal_filesize(env->me_lazy_fd, &env->me_dxb_mmap.filesize); + if (unlikely(err)) + return chk_error_rc(scope, err, "osal_filesize"); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, + &usr->result.problems_meta, "Peek the meta-pages..."); + if (likely(!err)) { + MDBX_chk_scope_t *const inner = usr->scope; + const uint64_t dxbfile_pages = + env->me_dxb_mmap.filesize >> env->me_psize2log; + usr->result.alloc_pages = txn->mt_next_pgno; + usr->result.backed_pages = bytes2pgno(env, env->me_dxb_mmap.current); + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + if (unlikely(dxbfile_pages < NUM_METAS)) + chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS)) + chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS || + dxbfile_pages < NUM_METAS)) + return MDBX_CORRUPTED; + if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { + chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", + usr->result.backed_pages, (size_t)MAX_PAGENO + 1); + usr->result.backed_pages = MAX_PAGENO + 1; + } + + if ((env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { + if (unlikely(usr->result.backed_pages > dxbfile_pages)) { + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + usr->result.backed_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + } else { + /* DB may be shrunk by writer down to the allocated (but unused) pages. */ + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + if (unlikely(usr->result.alloc_pages > dxbfile_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, + usr->result.alloc_pages, dxbfile_pages); + usr->result.alloc_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + usr->result.backed_pages = (size_t)dxbfile_pages; + } + + line = chk_line_feed(chk_print( + chk_line_begin(inner, MDBX_chk_info), + "pagesize %u (%u system), max keysize %u..%u" + ", max readers %u", + env->me_psize, env->me_os_psize, + mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), + mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->me_maxreaders)); + line = chk_line_feed( + chk_print_size(line, "mapsize ", env->me_dxb_mmap.current, nullptr)); + if (txn->mt_geo.lower == txn->mt_geo.upper) + line = chk_print_size( + line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); + else { + line = chk_print_size( + line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); + line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", "); + line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", "); + + line = chk_line_feed( + chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); + line = chk_print_size( + line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); + } + tASSERT(txn, txn->mt_geo.now == chk->envinfo.mi_geo.current / + chk->envinfo.mi_dxb_pagesize); + chk_line_end(chk_print(line, ", %u pages", txn->mt_geo.now)); +#if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG + if (txn->mt_geo.shrink_pv && txn->mt_geo.now != txn->mt_geo.upper && + scope->verbosity >= MDBX_chk_verbose) { + line = chk_line_begin(inner, MDBX_chk_notice); + chk_line_feed(chk_print( + line, " > WARNING: Due Windows system limitations a file couldn't")); + chk_line_feed(chk_print( + line, " > be truncated while the database is opened. So, the size")); + chk_line_feed(chk_print( + line, " > database file of may by large than the database itself,")); + chk_line_end(chk_print( + line, " > until it will be closed or reopened in read-write mode.")); + } +#endif /* Windows || Debug */ + chk_verbose_meta(inner, 0); + chk_verbose_meta(inner, 1); + chk_verbose_meta(inner, 2); + + if (env->me_stuck_meta >= 0) { + chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing), + "skip checking meta-pages since the %u" + " is selected for verification", + env->me_stuck_meta)); + line = chk_line_feed( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", " + "selected for verification %" PRIu64 ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, + chk->envinfo.mi_meta_txnid[env->me_stuck_meta], + chk->envinfo.mi_recent_txnid - + chk->envinfo.mi_meta_txnid[env->me_stuck_meta])); + chk_line_end(line); + } else { + chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs check for meta-pages clashes")); + const unsigned meta_clash_mask = meta_eq_mask(&chk->troika); + if (meta_clash_mask & 1) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1); + if (meta_clash_mask & 2) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2); + if (meta_clash_mask & 4) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0); + + const unsigned prefer_steady_metanum = chk->troika.prefer_steady; + const uint64_t prefer_steady_txnid = + chk->troika.txnid[prefer_steady_metanum]; + const unsigned recent_metanum = chk->troika.recent; + const uint64_t recent_txnid = chk->troika.txnid[recent_metanum]; + if (env->me_flags & MDBX_EXCLUSIVE) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs full check recent-txn-id with meta-pages")); + if (prefer_steady_txnid != chk->envinfo.mi_recent_txnid) { + chk_scope_issue( + inner, + "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + prefer_steady_metanum, prefer_steady_txnid, + chk->envinfo.mi_recent_txnid); + } + } else if (chk->write_locked) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs lite check recent-txn-id with meta-pages (not a " + "monopolistic mode)")); + if (recent_txnid != chk->envinfo.mi_recent_txnid) { + chk_scope_issue(inner, + "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + recent_metanum, recent_txnid, + chk->envinfo.mi_recent_txnid); + } + } else { + chk_line_end(chk_puts( + chk_line_begin(inner, MDBX_chk_verbose), + "skip check recent-txn-id with meta-pages (monopolistic or " + "read-write mode only)")); + } + + chk_line_end(chk_print( + chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", latter reader %" PRIu64 + ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); + } + } + err = chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + eASSERT(env, err == MDBX_SUCCESS); + if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skipping %s traversal...", "b-tree")); + else { + err = chk_scope_begin( + chk, -1, MDBX_chk_traversal_tree, nullptr, &usr->result.tree_problems, + "Traversal %s by txn#%" PRIaTXN "...", "b-tree", txn->mt_txnid); + if (likely(!err)) + err = chk_tree(usr->scope); + if (usr->result.tree_problems && usr->result.gc_tree_problems == 0) + usr->result.gc_tree_problems = usr->result.tree_problems; + if (usr->result.tree_problems && usr->result.kv_tree_problems == 0) + usr->result.kv_tree_problems = usr->result.tree_problems; + chk_scope_restore(scope, err); + } + + if (usr->result.gc_tree_problems > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + chk_v2a(chk, MDBX_CHK_GC), "b-tree", + usr->result.problems_gc = usr->result.gc_tree_problems)); + else { + err = chk_scope_begin(chk, -1, MDBX_chk_traversal_freedb, &chk->subdb_gc, + &usr->result.problems_gc, + "Traversal %s by txn#%" PRIaTXN "...", "GC/freeDB", + txn->mt_txnid); + if (likely(!err)) + err = chk_db(usr->scope, FREE_DBI, &chk->subdb_gc, chk_handle_gc); + line = chk_line_begin(scope, MDBX_chk_info); + if (line) { + histogram_print(scope, line, &chk->subdb_gc.histogram.nested_tree, + "span(s)", "single", false); + chk_line_end(line); + } + if (usr->result.problems_gc == 0 && + (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages; + if (usr->result.processed_pages != used_pages) + chk_scope_issue(usr->scope, + "used pages mismatch (%" PRIuSIZE + "(walked) != %" PRIuSIZE "(allocated - GC))", + usr->result.processed_pages, used_pages); + if (usr->result.unused_pages != usr->result.gc_pages) + chk_scope_issue(usr->scope, + "GC pages mismatch (%" PRIuSIZE + "(expected) != %" PRIuSIZE "(GC))", + usr->result.unused_pages, usr->result.gc_pages); + } + } + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, + "Page allocation:"); + const double percent_boundary_reciprocal = 100.0 / txn->mt_geo.upper; + const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages; + const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages; + const size_t available2boundary = txn->mt_geo.upper - + usr->result.alloc_pages + + usr->result.reclaimable_pages; + const size_t available2backed = usr->result.backed_pages - + usr->result.alloc_pages + + usr->result.reclaimable_pages; + const size_t remained2boundary = txn->mt_geo.upper - usr->result.alloc_pages; + const size_t remained2backed = + usr->result.backed_pages - usr->result.alloc_pages; + + const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + ? usr->result.alloc_pages - usr->result.gc_pages + : usr->result.processed_pages; + + line = chk_line_begin(usr->scope, MDBX_chk_info); + line = chk_print(line, + "backed by file: %" PRIuSIZE " pages (%.1f%%)" + ", %" PRIuSIZE " left to boundary (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal, + txn->mt_geo.upper - usr->result.backed_pages, + (txn->mt_geo.upper - usr->result.backed_pages) * + percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "used", used, used * percent_backed_reciprocal, + used * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "remained", remained2backed, remained2backed * percent_backed_reciprocal, + remained2boundary, remained2boundary * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" + ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", + usr->result.reclaimable_pages, + usr->result.reclaimable_pages * percent_backed_reciprocal, + usr->result.reclaimable_pages * percent_boundary_reciprocal, + usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal, + usr->result.gc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "detained by reader(s): %" PRIuSIZE + " (%.1f%% of backed, %.1f%% of boundary)" + ", %u reader(s), lag %" PRIi64, + detained, detained * percent_backed_reciprocal, + detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "allocated", usr->result.alloc_pages, + usr->result.alloc_pages * percent_backed_reciprocal, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print(line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "available", available2backed, + available2backed * percent_backed_reciprocal, + available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + + line = chk_line_begin(usr->scope, MDBX_chk_resolution); + line = chk_print(line, "%s %" PRIaPGNO " pages", + (txn->mt_geo.upper == txn->mt_geo.now) ? "total" : "upto", + txn->mt_geo.upper); + line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal); + line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", + usr->result.alloc_pages, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = + chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skipping %s traversal...", "key-value")); + else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + chk_v2a(chk, MDBX_CHK_MAIN), "key-value", + usr->result.problems_kv = usr->result.kv_tree_problems)); + else { + err = + chk_scope_begin(chk, 0, MDBX_chk_traversal_maindb, &chk->subdb_main, + &usr->result.problems_kv, "Processing %s...", "MainDB"); + if (likely(!err)) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, chk_handle_kv); + chk_scope_restore(scope, err); + + if (usr->result.problems_kv && usr->result.subdb_total) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s", "sub-database(s)")); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total == 0) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", + "sub-database(s)")); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total) { + err = chk_scope_begin(chk, 1, MDBX_chk_traversal_subdbs, nullptr, + &usr->result.problems_kv, + "Traversal %s by txn#%" PRIaTXN "...", + "sub-database(s)", txn->mt_txnid); + if (!err) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, nullptr); + if (usr->scope->subtotal_issues) + chk_line_end( + chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "processed %" PRIuSIZE " of %" PRIuSIZE " subDb(s)" + ", %" PRIuSIZE " problems(s)", + usr->result.subdb_processed, usr->result.subdb_total, + usr->scope->subtotal_issues)); + } + chk_scope_restore(scope, err); + } + + return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, + nullptr, nullptr)); +} + +__cold int mdbx_env_chk_problem(MDBX_chk_context_t *ctx) { + if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && + ctx->internal->problem_counter && ctx->scope)) { + *ctx->internal->problem_counter += 1; + ctx->scope->subtotal_issues += 1; + return MDBX_SUCCESS; + } + return MDBX_EINVAL; +} + +__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, + MDBX_chk_context_t *ctx, + const enum MDBX_chk_flags_t flags, + enum MDBX_chk_severity verbosity, + unsigned timeout_seconds_16dot16) { + int err, rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely(!cb || !ctx || ctx->internal)) + return MDBX_EINVAL; + + MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t)); + if (unlikely(!chk)) + return MDBX_ENOMEM; + + chk->cb = cb; + chk->usr = ctx; + chk->usr->internal = chk; + chk->usr->env = env; + chk->flags = flags; + + chk->subdb_gc.id = -1; + chk->subdb_gc.name.iov_base = MDBX_CHK_GC; + chk->subdb[FREE_DBI] = &chk->subdb_gc; + + chk->subdb_main.id = -1; + chk->subdb_main.name.iov_base = MDBX_CHK_MAIN; + chk->subdb[MAIN_DBI] = &chk->subdb_main; + + chk->monotime_timeout = + timeout_seconds_16dot16 + ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() + : 0; + chk->usr->scope_nesting = 0; + chk->usr->result.subdbs = (const void *)&chk->subdb; + + MDBX_chk_scope_t *const top = chk->scope_stack; + top->verbosity = verbosity; + top->internal = chk; + + // init + rc = chk_scope_end( + chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); + + // lock + if (likely(!rc)) + rc = chk_scope_begin( + chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", + (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); + if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { + rc = mdbx_txn_lock(env, false); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); + else + chk->write_locked = true; + } + if (likely(!rc)) { + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_begin"); + } + chk_scope_end(chk, rc); + + // doit + if (likely(!rc)) { + chk->subdb_gc.flags = ctx->txn->mt_dbs[FREE_DBI].md_flags; + chk->subdb_main.flags = ctx->txn->mt_dbs[MAIN_DBI].md_flags; + rc = env_chk(top); + } + + // unlock + if (ctx->txn || chk->write_locked) { + chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr); + if (ctx->txn) { + err = mdbx_txn_abort(ctx->txn); + if (err && !rc) + rc = err; + ctx->txn = nullptr; + } + if (chk->write_locked) + mdbx_txn_unlock(env); + rc = chk_scope_end(chk, rc); + } + + // finalize + err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr); + rc = chk_scope_end(chk, err ? err : rc); + chk_dispose(chk); + return rc; +} + /******************************************************************************/ /* *INDENT-OFF* */ /* clang-format off */ diff --git a/src/internals.h b/src/internals.h index 1664dcd7..c871b3df 100644 --- a/src/internals.h +++ b/src/internals.h @@ -703,7 +703,8 @@ typedef struct MDBX_page { #define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags) -/* Drop legacy P_DIRTY flag for sub-pages for compatilibity */ +/* Drop legacy P_DIRTY flag for sub-pages for compatilibity, + * for assertions only. */ #define PAGETYPE_COMPAT(p) \ (unlikely(PAGETYPE_WHOLE(p) & P_SUBP) \ ? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY) \ @@ -1136,10 +1137,10 @@ typedef struct troika { #if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */ uint32_t unused_pad; #endif -#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7) -#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64) -#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128) -#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3) +#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7u) +#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64u) +#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128u) +#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3u) txnid_t txnid[NUM_METAS]; } meta_troika_t; @@ -1787,3 +1788,33 @@ MDBX_MAYBE_UNUSED static void static_checks(void) { (size_t)(size), __LINE__); \ ASAN_UNPOISON_MEMORY_REGION(addr, size); \ } while (0) + +/******************************************************************************/ + +/** \brief Page types for traverse the b-tree. + * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ +enum MDBX_page_type_t { + MDBX_page_broken, + MDBX_page_large, + MDBX_page_branch, + MDBX_page_leaf, + MDBX_page_dupfixed_leaf, + MDBX_subpage_leaf, + MDBX_subpage_dupfixed_leaf, + MDBX_subpage_broken, +}; +typedef enum MDBX_page_type_t MDBX_page_type_t; + +typedef struct MDBX_walk_sdb { + MDBX_val name; + struct MDBX_db *internal, *nested; +} MDBX_walk_sdb_t; + +/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */ +typedef int +MDBX_pgvisitor_func(const size_t pgno, const unsigned number, void *const ctx, + const int deep, const MDBX_walk_sdb_t *subdb, + const size_t page_size, const MDBX_page_type_t page_type, + const MDBX_error_t err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes); diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index a8c97372..c590253d 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -25,19 +25,6 @@ #include -typedef struct flagbit { - int bit; - const char *name; -} flagbit; - -const flagbit dbflags[] = {{MDBX_DUPSORT, "dupsort"}, - {MDBX_INTEGERKEY, "integerkey"}, - {MDBX_REVERSEKEY, "reversekey"}, - {MDBX_DUPFIXED, "dupfixed"}, - {MDBX_REVERSEDUP, "reversedup"}, - {MDBX_INTEGERDUP, "integerdup"}, - {0, nullptr}}; - #if defined(_WIN32) || defined(_WIN64) #include "wingetopt.h" @@ -72,181 +59,171 @@ static void signal_handler(int sig) { #define EXIT_FAILURE_CHECK_MAJOR (EXIT_FAILURE + 1) #define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE -typedef struct { - MDBX_val name; - struct { - uint64_t branch, large_count, large_volume, leaf; - uint64_t subleaf_dupsort, leaf_dupfixed, subleaf_dupfixed; - uint64_t total, empty, other; - } pages; - uint64_t payload_bytes; - uint64_t lost_bytes; -} walk_dbi_t; - -struct { - short *pagemap; - uint64_t total_payload_bytes; - uint64_t pgcount; - walk_dbi_t - dbi[MDBX_MAX_DBI + CORE_DBS + /* account pseudo-entry for meta */ 1]; -} walk; - -#define dbi_free walk.dbi[FREE_DBI] -#define dbi_main walk.dbi[MAIN_DBI] -#define dbi_meta walk.dbi[CORE_DBS] - -int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; +enum MDBX_env_flags_t env_flags = + MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; MDBX_env *env; MDBX_txn *txn; -MDBX_envinfo envinfo; -size_t userdb_count, skipped_subdb; -uint64_t total_unused_bytes, reclaimable_pages, gc_pages, alloc_pages, - unused_pages, backed_pages; -unsigned verbose; -bool ignore_wrong_order, quiet, dont_traversal; +unsigned verbose = 0; +bool quiet; MDBX_val only_subdb; int stuck_meta = -1; +MDBX_chk_context_t chk; +bool turn_meta = false; +bool force_turn_meta = false; +enum MDBX_chk_flags_t chk_flags = MDBX_CHK_DEFAULTS; +enum MDBX_chk_stage chk_stage = MDBX_chk_none; -struct problem { - struct problem *pr_next; - size_t count; - const char *caption; -}; +static MDBX_chk_line_t line_struct; +static size_t anchor_lineno; +static size_t line_count; +static FILE *line_output; -struct problem *problems_list; -unsigned total_problems, data_tree_problems, gc_tree_problems; - -static void MDBX_PRINTF_ARGS(1, 2) print(const char *msg, ...) { - if (!quiet) { - va_list args; - - fflush(stderr); - va_start(args, msg); - vfprintf(stdout, msg, args); - va_end(args); - } -} - -static MDBX_val printable_buf; -static void free_printable_buf(void) { osal_free(printable_buf.iov_base); } - -static const char *sdb_name(const MDBX_val *val) { - if (val == MDBX_PGWALK_MAIN) - return "@MAIN"; - if (val == MDBX_PGWALK_GC) - return "@GC"; - if (val == MDBX_PGWALK_META) - return "@META"; - - const unsigned char *const data = val->iov_base; - const size_t len = val->iov_len; - if (data == MDBX_PGWALK_MAIN) - return "@MAIN"; - if (data == MDBX_PGWALK_GC) - return "@GC"; - if (data == MDBX_PGWALK_META) - return "@META"; - - if (!len) - return ""; - if (!data) - return ""; - if (len > 65536) { - static char buf[64]; - /* NOTE: There is MSYS2 MinGW bug if you here got - * the "unknown conversion type character ‘z’ in format [-Werror=format=]" - * https://stackoverflow.com/questions/74504432/whats-the-proper-way-to-tell-mingw-based-gcc-to-use-ansi-stdio-output-on-windo - */ - snprintf(buf, sizeof(buf), "", len); - return buf; - } - - bool printable = true; - bool quoting = false; - size_t xchars = 0; - for (size_t i = 0; i < val->iov_len && printable; ++i) { - quoting |= data[i] != '_' && isalnum(data[i]) == 0; - printable = isprint(data[i]) != 0 || - (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); - } - - size_t need = len + 1; - if (quoting || !printable) - need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; - if (need > printable_buf.iov_len) { - void *ptr = osal_realloc(printable_buf.iov_base, need); - if (!ptr) - return ""; - if (!printable_buf.iov_base) - atexit(free_printable_buf); - printable_buf.iov_base = ptr; - printable_buf.iov_len = need; - } - - char *out = printable_buf.iov_base; - if (!quoting) { - memcpy(out, data, len); - out += len; - } else if (printable) { - *out++ = '\''; - for (size_t i = 0; i < len; ++i) { - if (data[i] < ' ') { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 4); - static const char hex[] = "0123456789abcdef"; - out[0] = '\\'; - out[1] = 'x'; - out[2] = hex[data[i] >> 4]; - out[3] = hex[data[i] & 15]; - out += 4; - } else if (strchr("\"'`\\", data[i])) { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 2); - out[0] = '\\'; - out[1] = data[i]; - out += 2; - } else { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 1); - *out++ = data[i]; - } +#define LINE_SEVERITY_NONE 255 +static bool lf(void) { + if (!line_struct.empty) { + line_count += 1; + line_struct.empty = true; + line_struct.severity = LINE_SEVERITY_NONE; + line_struct.scope_depth = 0; + if (line_output) { + fputc('\n', line_output); + return true; } - *out++ = '\''; } - assert((char *)printable_buf.iov_base + printable_buf.iov_len > out); - *out = 0; - return printable_buf.iov_base; + return false; } -static void va_log(MDBX_log_level_t level, const char *function, int line, - const char *msg, va_list args) { - static const char *const prefixes[] = { - "!!!fatal: ", " ! " /* error */, " ~ " /* warning */, - " " /* notice */, " // " /* verbose */, " //// " /* debug */, - " ////// " /* trace */ +static void flush(void) { fflush(nullptr); } + +static void lf_flush(void) { + if (lf()) + flush(); +} + +static bool silently(enum MDBX_chk_severity severity) { + int cutoff = + chk.scope ? chk.scope->verbosity >> MDBX_chk_severity_prio_shift + : verbose + (MDBX_chk_result >> MDBX_chk_severity_prio_shift); + int prio = (severity >> MDBX_chk_severity_prio_shift); + if (chk.scope && chk.scope->stage == MDBX_chk_traversal_subdbs && verbose < 2) + prio += 1; + return quiet || cutoff < ((prio > 0) ? prio : 0); +} + +static FILE *prefix(enum MDBX_chk_severity severity) { + if (silently(severity)) + return nullptr; + + static const char *const prefixes[16] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + "", // 4 result + " = ", // 5 resolution + " - ", // 6 processing + " ", // 7 info + " ", // 8 verbose + " ", // 9 details + " // ", // A lib-verbose + " //// ", // B lib-debug + " ////// ", // C lib-trace + " ////// ", // D lib-extra + " ////// ", // E +1 + " ////// " // F +2 }; - FILE *out = stdout; - if (level <= MDBX_LOG_ERROR) { - total_problems++; - out = stderr; + const bool nl = + line_struct.scope_depth != chk.scope_nesting || + (line_struct.severity != severity && + (line_struct.severity != MDBX_chk_processing || + severity < MDBX_chk_result || severity > MDBX_chk_resolution)); + if (nl) + lf(); + if (severity < MDBX_chk_warning) + flush(); + FILE *out = (severity > MDBX_chk_error) ? stdout : stderr; + if (nl || line_struct.empty) { + line_struct.severity = severity; + line_struct.scope_depth = chk.scope_nesting; + unsigned kind = line_struct.severity & MDBX_chk_severity_kind_mask; + if (line_struct.scope_depth || *prefixes[kind]) { + line_struct.empty = false; + for (size_t i = 0; i < line_struct.scope_depth; ++i) + fputs(" ", out); + fputs(prefixes[kind], out); + } } + return line_output = out; +} - if (!quiet && verbose + 1 >= (unsigned)level && - (unsigned)level < ARRAY_LENGTH(prefixes)) { - fflush(nullptr); - fputs(prefixes[level], out); +static void suffix(size_t cookie, const char *str) { + if (cookie == line_count && !line_struct.empty) { + fprintf(line_output, " %s", str); + line_struct.empty = false; + lf(); + } +} + +static size_t MDBX_PRINTF_ARGS(2, 3) + print(enum MDBX_chk_severity severity, const char *msg, ...) { + FILE *out = prefix(severity); + if (out) { + va_list args; + va_start(args, msg); vfprintf(out, msg, args); - - const bool have_lf = msg[strlen(msg) - 1] == '\n'; - if (level == MDBX_LOG_FATAL && function && line) - fprintf(out, have_lf ? " %s(), %u\n" : " (%s:%u)\n", - function + (strncmp(function, "mdbx_", 5) ? 5 : 0), line); - else if (!have_lf) - fputc('\n', out); - fflush(nullptr); + va_end(args); + line_struct.empty = false; + return line_count; } + return 0; +} +static FILE *MDBX_PRINTF_ARGS(2, 3) + print_ln(enum MDBX_chk_severity severity, const char *msg, ...) { + FILE *out = prefix(severity); + if (out) { + va_list args; + va_start(args, msg); + vfprintf(out, msg, args); + va_end(args); + line_struct.empty = false; + lf(); + } + return out; +} + +static void logger(MDBX_log_level_t level, const char *function, int line, + const char *fmt, va_list args) { + if (level <= MDBX_LOG_ERROR) + mdbx_env_chk_problem(&chk); + + const unsigned kind = (level > MDBX_LOG_NOTICE) + ? level - MDBX_LOG_NOTICE + + (MDBX_chk_extra & MDBX_chk_severity_kind_mask) + : level; + const unsigned prio = kind << MDBX_chk_severity_prio_shift; + enum MDBX_chk_severity severity = prio + kind; + FILE *out = prefix(severity); + if (out) { + vfprintf(out, fmt, args); + const bool have_lf = fmt[strlen(fmt) - 1] == '\n'; + if (level == MDBX_LOG_FATAL && function && line) { + if (have_lf) + for (size_t i = 0; i < line_struct.scope_depth; ++i) + fputs(" ", out); + fprintf(out, have_lf ? " %s(), %u" : " (%s:%u)", + function + (strncmp(function, "mdbx_", 5) ? 0 : 5), line); + lf(); + } else if (have_lf) { + line_struct.empty = true; + line_struct.severity = LINE_SEVERITY_NONE; + line_count += 1; + } else + lf(); + } + if (level < MDBX_LOG_VERBOSE) + flush(); if (level == MDBX_LOG_FATAL) { #if !MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS exit(EXIT_FAILURE_MDBX); @@ -255,767 +232,144 @@ static void va_log(MDBX_log_level_t level, const char *function, int line, } } -static void MDBX_PRINTF_ARGS(1, 2) error(const char *msg, ...) { +static void MDBX_PRINTF_ARGS(1, 2) error_fmt(const char *msg, ...) { va_list args; va_start(args, msg); - va_log(MDBX_LOG_ERROR, nullptr, 0, msg, args); + logger(MDBX_LOG_ERROR, nullptr, 0, msg, args); va_end(args); } -static void logger(MDBX_log_level_t level, const char *function, int line, - const char *msg, va_list args) { - (void)line; - (void)function; - if (level < MDBX_LOG_EXTRA) - va_log(level, function, line, msg, args); +static int error_fn(const char *fn, int err) { + if (err) + error_fmt("%s() failed, error %d, %s", fn, err, mdbx_strerror(err)); + return err; } -static int check_user_break(void) { - switch (user_break) { - case 0: - return MDBX_SUCCESS; - case 1: - print(" - interrupted by signal\n"); - fflush(nullptr); +static bool check_break(MDBX_chk_context_t *ctx) { + (void)ctx; + if (!user_break) + return false; + if (user_break == 1) { + print(MDBX_chk_resolution, "interrupted by signal"); + lf_flush(); user_break = 2; } - return MDBX_EINTR; + return true; } -static void pagemap_cleanup(void) { - osal_free(walk.pagemap); - walk.pagemap = nullptr; -} - -static bool eq(const MDBX_val a, const MDBX_val b) { - return a.iov_len == b.iov_len && - (a.iov_base == b.iov_base || a.iov_len == 0 || - !memcmp(a.iov_base, b.iov_base, a.iov_len)); -} - -static walk_dbi_t *pagemap_lookup_dbi(const MDBX_val *dbi_name, bool silent) { - static walk_dbi_t *last; - - if (dbi_name == MDBX_PGWALK_MAIN) - return &dbi_main; - if (dbi_name == MDBX_PGWALK_GC) - return &dbi_free; - if (dbi_name == MDBX_PGWALK_META) - return &dbi_meta; - - if (last && eq(last->name, *dbi_name)) - return last; - - walk_dbi_t *dbi = walk.dbi + CORE_DBS + /* account pseudo-entry for meta */ 1; - for (; dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - if (eq(dbi->name, *dbi_name)) - return last = dbi; - } - - if (verbose > 0 && !silent) { - print(" - found %s area\n", sdb_name(dbi_name)); - fflush(nullptr); - } - - if (dbi == ARRAY_END(walk.dbi)) - return nullptr; - - dbi->name = *dbi_name; - return last = dbi; -} - -static void MDBX_PRINTF_ARGS(4, 5) - problem_add(const char *object, uint64_t entry_number, const char *msg, - const char *extra, ...) { - total_problems++; - - if (!quiet) { - int need_fflush = 0; - struct problem *p; - - for (p = problems_list; p; p = p->pr_next) - if (p->caption == msg) - break; - - if (!p) { - p = osal_calloc(1, sizeof(*p)); - if (unlikely(!p)) - return; - p->caption = msg; - p->pr_next = problems_list; - problems_list = p; - need_fflush = 1; - } - - p->count++; - if (verbose > 1) { - print(" %s #%" PRIu64 ": %s", object, entry_number, msg); - if (extra) { - va_list args; - printf(" ("); - va_start(args, extra); - vfprintf(stdout, extra, args); - va_end(args); - printf(")"); - } - printf("\n"); - if (need_fflush) - fflush(nullptr); +static int scope_push(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, + MDBX_chk_scope_t *inner, const char *fmt, va_list args) { + (void)scope; + if (fmt && *fmt) { + FILE *out = prefix(MDBX_chk_processing); + if (out) { + vfprintf(out, fmt, args); + inner->usr_o.number = line_count; + line_struct.ctx = ctx; + flush(); } } + return MDBX_SUCCESS; } -static struct problem *problems_push(void) { - struct problem *p = problems_list; - problems_list = nullptr; - return p; -} - -static size_t problems_pop(struct problem *list) { - size_t count = 0; - - if (problems_list) { - int i; - - print(" - problems: "); - for (i = 0; problems_list; ++i) { - struct problem *p = problems_list->pr_next; - count += problems_list->count; - print("%s%s (%" PRIuPTR ")", i ? ", " : "", problems_list->caption, - problems_list->count); - osal_free(problems_list); - problems_list = p; - } - print("\n"); - fflush(nullptr); - } - - problems_list = list; - return count; -} - -static int pgvisitor(const uint64_t pgno, const unsigned pgnumber, - void *const ctx, const int deep, const MDBX_val *dbi_name, - const size_t page_size, const MDBX_page_type_t pagetype, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) { +static void scope_pop(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, + MDBX_chk_scope_t *inner) { (void)ctx; - const bool is_gc_tree = dbi_name == MDBX_PGWALK_GC; - if (deep > 42) { - problem_add("deep", deep, "too large", nullptr); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - return MDBX_CORRUPTED /* avoid infinite loop/recursion */; - } - - walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name, false); - if (!dbi) { - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - return MDBX_ENOMEM; - } - - const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; - walk.pgcount += pgnumber; - - const char *pagetype_caption; - bool branch = false; - switch (pagetype) { - default: - problem_add("page", pgno, "unknown page-type", "type %u, deep %i", - (unsigned)pagetype, deep); - pagetype_caption = "unknown"; - dbi->pages.other += pgnumber; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_page_broken: - pagetype_caption = "broken"; - dbi->pages.other += pgnumber; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_subpage_broken: - pagetype_caption = "broken-subpage"; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_page_meta: - pagetype_caption = "meta"; - dbi->pages.other += pgnumber; - break; - case MDBX_page_large: - pagetype_caption = "large"; - dbi->pages.large_volume += pgnumber; - dbi->pages.large_count += 1; - break; - case MDBX_page_branch: - pagetype_caption = "branch"; - dbi->pages.branch += pgnumber; - branch = true; - break; - case MDBX_page_leaf: - pagetype_caption = "leaf"; - dbi->pages.leaf += pgnumber; - break; - case MDBX_page_dupfixed_leaf: - pagetype_caption = "leaf-dupfixed"; - dbi->pages.leaf_dupfixed += pgnumber; - break; - case MDBX_subpage_leaf: - pagetype_caption = "subleaf-dupsort"; - dbi->pages.subleaf_dupsort += 1; - break; - case MDBX_subpage_dupfixed_leaf: - pagetype_caption = "subleaf-dupfixed"; - dbi->pages.subleaf_dupfixed += 1; - break; - } - - if (pgnumber) { - if (verbose > 3 && (!only_subdb.iov_base || eq(only_subdb, dbi->name))) { - if (pgnumber == 1) - print(" %s-page %" PRIu64, pagetype_caption, pgno); - else - print(" %s-span %" PRIu64 "[%u]", pagetype_caption, pgno, pgnumber); - print(" of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR - ", unused %" PRIiPTR ", deep %i\n", - sdb_name(&dbi->name), header_bytes, - (pagetype == MDBX_page_branch) ? "keys" : "entries", nentries, - payload_bytes, unused_bytes, deep); - } - - bool already_used = false; - for (unsigned n = 0; n < pgnumber; ++n) { - uint64_t spanpgno = pgno + n; - if (spanpgno >= alloc_pages) { - problem_add("page", spanpgno, "wrong page-no", - "%s-page: %" PRIu64 " > %" PRIu64 ", deep %i", - pagetype_caption, spanpgno, alloc_pages, deep); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else if (walk.pagemap[spanpgno]) { - walk_dbi_t *coll_dbi = &walk.dbi[walk.pagemap[spanpgno] - 1]; - problem_add("page", spanpgno, - (branch && coll_dbi == dbi) ? "loop" : "already used", - "%s-page: by %s, deep %i", pagetype_caption, - sdb_name(&coll_dbi->name), deep); - already_used = true; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - walk.pagemap[spanpgno] = (short)(dbi - walk.dbi + 1); - dbi->pages.total += 1; - } - } - - if (already_used) - return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ - : MDBX_SUCCESS; - } - - if (MDBX_IS_ERROR(err)) { - problem_add("page", pgno, "invalid/corrupted", "%s-page", pagetype_caption); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - if (unused_bytes > page_size) { - problem_add("page", pgno, "illegal unused-bytes", - "%s-page: %u < %" PRIuPTR " < %u", pagetype_caption, 0, - unused_bytes, envinfo.mi_dxb_pagesize); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - - if (header_bytes < (int)sizeof(long) || - (size_t)header_bytes >= envinfo.mi_dxb_pagesize - sizeof(long)) { - problem_add("page", pgno, "illegal header-length", - "%s-page: %" PRIuPTR " < %" PRIuPTR " < %" PRIuPTR, - pagetype_caption, sizeof(long), header_bytes, - envinfo.mi_dxb_pagesize - sizeof(long)); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - if (payload_bytes < 1) { - if (nentries > 1) { - problem_add("page", pgno, "zero size-of-entry", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries", - pagetype_caption, payload_bytes, nentries); - /* if ((size_t)header_bytes + unused_bytes < page_size) { - // LY: hush a misuse error - page_bytes = page_size; - } */ - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - problem_add("page", pgno, "empty", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); - dbi->pages.empty += 1; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - } - - if (pgnumber) { - if (page_bytes != page_size) { - problem_add("page", pgno, "misused", - "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR - "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", - pagetype_caption, page_size, page_bytes, header_bytes, - payload_bytes, unused_bytes, deep); - if (page_size > page_bytes) - dbi->lost_bytes += page_size - page_bytes; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - dbi->payload_bytes += (uint64_t)payload_bytes + header_bytes; - walk.total_payload_bytes += (uint64_t)payload_bytes + header_bytes; - } - } - } - - return check_user_break(); + (void)scope; + suffix(inner->usr_o.number, inner->subtotal_issues ? "error(s)" : "done"); + flush(); } -typedef int(visitor)(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data); -static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name, - visitor *handler); - -static int handle_userdb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - (void)record_number; - (void)key; - (void)data; - return check_user_break(); +static MDBX_chk_user_subdb_cookie_t *subdb_filter(MDBX_chk_context_t *ctx, + const MDBX_val *name, + MDBX_db_flags_t flags) { + (void)ctx; + (void)flags; + return (!only_subdb.iov_base || + (only_subdb.iov_len == name->iov_len && + memcmp(only_subdb.iov_base, name->iov_base, name->iov_len) == 0)) + ? (void *)(intptr_t)-1 + : nullptr; } -static int handle_freedb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - char *bad = ""; - pgno_t *iptr = data->iov_base; - - if (key->iov_len != sizeof(txnid_t)) - problem_add("entry", record_number, "wrong txn-id size", - "key-size %" PRIiPTR, key->iov_len); - else { - txnid_t txnid; - memcpy(&txnid, key->iov_base, sizeof(txnid)); - if (txnid < 1 || txnid > envinfo.mi_recent_txnid) - problem_add("entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid); - else { - if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) - problem_add("entry", txnid, "wrong idl size", "%" PRIuPTR, - data->iov_len); - size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; - if (number < 1 || number > MDBX_PGL_LIMIT) - problem_add("entry", txnid, "wrong idl length", "%" PRIuPTR, number); - else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { - problem_add("entry", txnid, "trimmed idl", - "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", - (number + 1) * sizeof(pgno_t), data->iov_len); - number = data->iov_len / sizeof(pgno_t) - 1; - } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= - /* LY: allow gap up to one page. it is ok - * and better than shink-and-retry inside update_gc() */ - envinfo.mi_dxb_pagesize) - problem_add("entry", txnid, "extra idl space", - "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", - (number + 1) * sizeof(pgno_t), data->iov_len); - - gc_pages += number; - if (envinfo.mi_latter_reader_txnid > txnid) - reclaimable_pages += number; - - pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : txn->mt_next_pgno; - pgno_t span = 1; - for (size_t i = 0; i < number; ++i) { - if (check_user_break()) - return MDBX_EINTR; - const pgno_t pgno = iptr[i]; - if (pgno < NUM_METAS) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " < meta-pages %u", pgno, NUM_METAS); - else if (pgno >= backed_pages) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " > backed-pages %" PRIu64, pgno, - backed_pages); - else if (pgno >= alloc_pages) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " > alloc-pages %" PRIu64, pgno, - alloc_pages - 1); - else { - if (MDBX_PNL_DISORDERED(prev, pgno)) { - bad = " [bad sequence]"; - problem_add("entry", txnid, "bad sequence", - "%" PRIaPGNO " %c [%zu].%" PRIaPGNO, prev, - (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), - i, pgno); - } - if (walk.pagemap) { - int idx = walk.pagemap[pgno]; - if (idx == 0) - walk.pagemap[pgno] = -1; - else if (idx > 0) - problem_add("page", pgno, "already used", "by %s", - sdb_name(&walk.dbi[idx - 1].name)); - else - problem_add("page", pgno, "already listed in GC", nullptr); - } - } - prev = pgno; - while (i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span))) - ++span; - } - if (verbose > 3 && !only_subdb.iov_base) { - print(" transaction %" PRIaTXN ", %" PRIuPTR - " pages, maxspan %" PRIaPGNO "%s\n", - txnid, number, span, bad); - if (verbose > 4) { - for (size_t i = 0; i < number; i += span) { - const pgno_t pgno = iptr[i]; - for (span = 1; - i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span)); - ++span) - ; - if (span > 1) { - print(" %9" PRIaPGNO "[%" PRIaPGNO "]\n", pgno, span); - } else - print(" %9" PRIaPGNO "\n", pgno); - } - } - } - } - } - - return check_user_break(); +static int stage_begin(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage) { + (void)ctx; + chk_stage = stage; + anchor_lineno = line_count; + flush(); + return MDBX_SUCCESS; } -static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { - return eq(*a, *b) ? 0 : 1; +static int conclude(MDBX_chk_context_t *ctx); +static int stage_end(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage, + int err) { + if (stage == MDBX_chk_conclude && !err) + err = conclude(ctx); + suffix(anchor_lineno, err ? "error(s)" : "done"); + flush(); + chk_stage = MDBX_chk_none; + return err; } -static int handle_maindb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - if (data->iov_len == sizeof(MDBX_db)) { - int rc = process_db(~0u, key, handle_userdb); - if (rc != MDBX_INCOMPATIBLE) { - userdb_count++; - return rc; - } +static MDBX_chk_line_t *print_begin(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity) { + (void)ctx; + if (silently(severity)) + return nullptr; + if (line_struct.ctx) { + if (line_struct.severity == MDBX_chk_processing && + severity >= MDBX_chk_result && severity <= MDBX_chk_resolution && + line_output) + fputc(' ', line_output); + else + lf(); + line_struct.ctx = nullptr; } - return handle_userdb(record_number, key, data); + line_struct.severity = severity; + return &line_struct; } -static const char *db_flags2keymode(unsigned flags) { - flags &= (MDBX_REVERSEKEY | MDBX_INTEGERKEY); - switch (flags) { - case 0: - return "usual"; - case MDBX_REVERSEKEY: - return "reserve"; - case MDBX_INTEGERKEY: - return "ordinal"; - case MDBX_REVERSEKEY | MDBX_INTEGERKEY: - return "msgpack"; - default: - assert(false); - __unreachable(); - } +static void print_flush(MDBX_chk_line_t *line) { + (void)line; + flush(); } -static const char *db_flags2valuemode(unsigned flags) { - flags &= (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP); - switch (flags) { - case 0: - return "single"; - case MDBX_DUPSORT: - return "multi"; - case MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_REVERSEDUP: - return "multi-reverse"; - case MDBX_DUPFIXED: - case MDBX_DUPSORT | MDBX_DUPFIXED: - return "multi-samelength"; - case MDBX_DUPFIXED | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: - return "multi-reverse-samelength"; - case MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: - case MDBX_DUPFIXED | MDBX_INTEGERDUP: - return "multi-ordinal"; - case MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - return "multi-msgpack"; - case MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - return "reserved"; - default: - assert(false); - __unreachable(); - } +static void print_done(MDBX_chk_line_t *line) { + lf(); + line->ctx = nullptr; } -static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name, - visitor *handler) { - MDBX_cursor *mc; - MDBX_stat ms; - MDBX_val key, data; - MDBX_val prev_key, prev_data; - unsigned flags; - int rc, i; - struct problem *saved_list; - uint64_t problems_count; - const bool second_pass = dbi_handle == MAIN_DBI; - - uint64_t record_count = 0, dups = 0; - uint64_t key_bytes = 0, data_bytes = 0; - - if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & mdbx_txn_flags(txn)) { - print(" ! abort processing %s due to a previous error\n", - sdb_name(dbi_name)); - return MDBX_BAD_TXN; - } - - if (dbi_handle == ~0u) { - rc = mdbx_dbi_open_ex2( - txn, dbi_name, MDBX_DB_ACCEDE, &dbi_handle, - (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr, - (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr); - if (rc) { - if (!dbi_name || - rc != - MDBX_INCOMPATIBLE) /* LY: mainDB's record is not a user's DB. */ { - error("mdbx_dbi_open(%s) failed, error %d %s\n", sdb_name(dbi_name), rc, - mdbx_strerror(rc)); - } - return rc; - } - } - - if (dbi_handle >= CORE_DBS && dbi_name && only_subdb.iov_base && - !eq(only_subdb, *dbi_name)) { - if (verbose) { - print("Skip processing %s...\n", sdb_name(dbi_name)); - fflush(nullptr); - } - skipped_subdb++; - return MDBX_SUCCESS; - } - - if (!second_pass && verbose) - print("Processing %s...\n", sdb_name(dbi_name)); - fflush(nullptr); - - rc = mdbx_dbi_flags(txn, dbi_handle, &flags); - if (rc) { - error("mdbx_dbi_flags() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - rc = mdbx_dbi_stat(txn, dbi_handle, &ms, sizeof(ms)); - if (rc) { - error("mdbx_dbi_stat() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - if (!second_pass && verbose) { - print(" - key-value kind: %s-key => %s-value", db_flags2keymode(flags), - db_flags2valuemode(flags)); - if (verbose > 1) { - print(", flags:"); - if (!flags) - print(" none"); - else { - for (i = 0; dbflags[i].bit; i++) - if (flags & dbflags[i].bit) - print(" %s", dbflags[i].name); - } - if (verbose > 2) - print(" (0x%02X), dbi-id %d", flags, dbi_handle); - } - print("\n"); - if (ms.ms_mod_txnid) - print(" - last modification txn#%" PRIu64 "\n", ms.ms_mod_txnid); - if (verbose > 1) { - print(" - page size %u, entries %" PRIu64 "\n", ms.ms_psize, - ms.ms_entries); - print(" - b-tree depth %u, pages: branch %" PRIu64 ", leaf %" PRIu64 - ", overflow %" PRIu64 "\n", - ms.ms_depth, ms.ms_branch_pages, ms.ms_leaf_pages, - ms.ms_overflow_pages); - } - } - - walk_dbi_t *dbi = (dbi_handle < CORE_DBS) - ? &walk.dbi[dbi_handle] - : pagemap_lookup_dbi(dbi_name, true); - if (!dbi) { - error("too many DBIs or out of memory\n"); - return MDBX_ENOMEM; - } - if (!dont_traversal) { - const uint64_t subtotal_pages = - ms.ms_branch_pages + ms.ms_leaf_pages + ms.ms_overflow_pages; - if (subtotal_pages != dbi->pages.total) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "subtotal", subtotal_pages, dbi->pages.total); - if (ms.ms_branch_pages != dbi->pages.branch) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "branch", - ms.ms_branch_pages, dbi->pages.branch); - const uint64_t allleaf_pages = dbi->pages.leaf + dbi->pages.leaf_dupfixed; - if (ms.ms_leaf_pages != allleaf_pages) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "all-leaf", ms.ms_leaf_pages, allleaf_pages); - if (ms.ms_overflow_pages != dbi->pages.large_volume) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "large/overlow", ms.ms_overflow_pages, dbi->pages.large_volume); - } - rc = mdbx_cursor_open(txn, dbi_handle, &mc); - if (rc) { - error("mdbx_cursor_open() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - if (ignore_wrong_order) { /* for debugging with enabled assertions */ - mc->mc_checking |= CC_SKIPORD; - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD; - } - - const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, flags); - saved_list = problems_push(); - prev_key.iov_base = nullptr; - prev_key.iov_len = 0; - prev_data.iov_base = nullptr; - prev_data.iov_len = 0; - rc = mdbx_cursor_get(mc, &key, &data, MDBX_FIRST); - while (rc == MDBX_SUCCESS) { - rc = check_user_break(); - if (rc) - goto bailout; - - if (!second_pass) { - bool bad_key = false; - if (key.iov_len > maxkeysize) { - problem_add("entry", record_count, "key length exceeds max-key-size", - "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); - bad_key = true; - } else if ((flags & MDBX_INTEGERKEY) && key.iov_len != sizeof(uint64_t) && - key.iov_len != sizeof(uint32_t)) { - problem_add("entry", record_count, "wrong key length", - "%" PRIuPTR " != 4or8", key.iov_len); - bad_key = true; - } - - bool bad_data = false; - if ((flags & MDBX_INTEGERDUP) && data.iov_len != sizeof(uint64_t) && - data.iov_len != sizeof(uint32_t)) { - problem_add("entry", record_count, "wrong data length", - "%" PRIuPTR " != 4or8", data.iov_len); - bad_data = true; - } - - if (prev_key.iov_base) { - if (prev_data.iov_base && !bad_data && (flags & MDBX_DUPFIXED) && - prev_data.iov_len != data.iov_len) { - problem_add("entry", record_count, "different data length", - "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, - data.iov_len); - bad_data = true; - } - - if (!bad_key) { - int cmp = mdbx_cmp(txn, dbi_handle, &key, &prev_key); - if (cmp == 0) { - ++dups; - if ((flags & MDBX_DUPSORT) == 0) { - problem_add("entry", record_count, "duplicated entries", nullptr); - if (prev_data.iov_base && data.iov_len == prev_data.iov_len && - memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == - 0) { - problem_add("entry", record_count, "complete duplicate", - nullptr); - } - } else if (!bad_data && prev_data.iov_base) { - cmp = mdbx_dcmp(txn, dbi_handle, &data, &prev_data); - if (cmp == 0) { - problem_add("entry", record_count, "complete duplicate", - nullptr); - } else if (cmp < 0 && !ignore_wrong_order) { - problem_add("entry", record_count, - "wrong order of multi-values", nullptr); - } - } - } else if (cmp < 0 && !ignore_wrong_order) { - problem_add("entry", record_count, "wrong order of entries", - nullptr); - } - } - } - - if (!bad_key) { - if (verbose && (flags & MDBX_INTEGERKEY) && !prev_key.iov_base) - print(" - fixed key-size %" PRIuPTR "\n", key.iov_len); - prev_key = key; - } - if (!bad_data) { - if (verbose && (flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) && - !prev_data.iov_base) - print(" - fixed data-size %" PRIuPTR "\n", data.iov_len); - prev_data = data; - } - } - - if (handler) { - rc = handler(record_count, &key, &data); - if (MDBX_IS_ERROR(rc)) - goto bailout; - } - - record_count++; - key_bytes += key.iov_len; - data_bytes += data.iov_len; - - rc = mdbx_cursor_get(mc, &key, &data, MDBX_NEXT); - } - if (rc != MDBX_NOTFOUND) - error("mdbx_cursor_get() failed, error %d %s\n", rc, mdbx_strerror(rc)); - else - rc = 0; - - if (record_count != ms.ms_entries) - problem_add("entry", record_count, "different number of entries", - "%" PRIu64 " != %" PRIu64, record_count, ms.ms_entries); -bailout: - problems_count = problems_pop(saved_list); - if (!second_pass && verbose) { - print(" - summary: %" PRIu64 " records, %" PRIu64 " dups, %" PRIu64 - " key's bytes, %" PRIu64 " data's " - "bytes, %" PRIu64 " problems\n", - record_count, dups, key_bytes, data_bytes, problems_count); - fflush(nullptr); - } - - mdbx_cursor_close(mc); - return (rc || problems_count) ? MDBX_RESULT_TRUE : MDBX_SUCCESS; +static void print_chars(MDBX_chk_line_t *line, const char *str, size_t len) { + if (line->empty) + prefix(line->severity); + fwrite(str, 1, len, line_output); } +static void print_format(MDBX_chk_line_t *line, const char *fmt, va_list args) { + if (line->empty) + prefix(line->severity); + vfprintf(line_output, fmt, args); +} + +static const MDBX_chk_callbacks_t cb = {.check_break = check_break, + .scope_push = scope_push, + .scope_pop = scope_pop, + .subdb_filter = subdb_filter, + .stage_begin = stage_begin, + .stage_end = stage_end, + .print_begin = print_begin, + .print_flush = print_flush, + .print_done = print_done, + .print_chars = print_chars, + .print_format = print_format}; + static void usage(char *prog) { fprintf( stderr, "usage: %s " "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n" " -V\t\tprint version and exit\n" - " -v\t\tmore verbose, could be used multiple times\n" + " -v\t\tmore verbose, could be repeated upto 9 times\n" " -q\t\tbe quiet\n" " -c\t\tforce cooperative mode (don't try exclusive)\n" " -w\t\twrite-mode checking\n" @@ -1031,144 +385,68 @@ static void usage(char *prog) { exit(EXIT_INTERRUPTED); } -static bool meta_ot(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b, - uint64_t sign_b, const bool wanna_steady) { - if (txn_a == txn_b) - return SIGN_IS_STEADY(sign_b); - - if (wanna_steady && SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b)) - return SIGN_IS_STEADY(sign_b); - - return txn_a < txn_b; -} - -static bool meta_eq(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b, - uint64_t sign_b) { - if (!txn_a || txn_a != txn_b) - return false; - - if (SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b)) - return false; - - return true; -} - -static int meta_recent(const bool wanna_steady) { - if (meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady)) - return meta_ot(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady) - ? 1 - : 2; - else - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, wanna_steady) - ? 2 - : 0; -} - -static int meta_tail(int head) { - switch (head) { - case 0: - return meta_ot(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true) - ? 1 - : 2; - case 1: - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true) - ? 0 - : 2; - case 2: - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, true) - ? 0 - : 1; - default: - assert(false); - return -1; +static int conclude(MDBX_chk_context_t *ctx) { + int err = MDBX_SUCCESS; + if (ctx->result.total_problems == 1 && ctx->result.problems_meta == 1 && + (chk_flags & + (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + (env_flags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && + stuck_meta < 0 && ctx->result.steady_txnid < ctx->result.recent_txnid) { + const size_t step_lineno = + print(MDBX_chk_resolution, + "Perform sync-to-disk for make steady checkpoint" + " at txn-id #%" PRIi64 "...", + ctx->result.recent_txnid); + flush(); + err = error_fn("mdbx_env_pgwalk", mdbx_env_sync_ex(ctx->env, true, false)); + if (err == MDBX_SUCCESS) { + ctx->result.problems_meta -= 1; + ctx->result.total_problems -= 1; + suffix(step_lineno, "done"); + } } -} -static int meta_head(void) { return meta_recent(false); } - -void verbose_meta(int num, txnid_t txnid, uint64_t sign, uint64_t bootid_x, - uint64_t bootid_y) { - const bool have_bootid = (bootid_x | bootid_y) != 0; - const bool bootid_match = bootid_x == envinfo.mi_bootid.current.x && - bootid_y == envinfo.mi_bootid.current.y; - - print(" - meta-%d: ", num); - switch (sign) { - case MDBX_DATASIGN_NONE: - print("no-sync/legacy"); - break; - case MDBX_DATASIGN_WEAK: - print("weak-%s", bootid_match ? (have_bootid ? "intact (same boot-id)" - : "unknown (no boot-id") - : "dead"); - break; - default: - print("steady"); - break; + if (turn_meta && stuck_meta >= 0 && + (chk_flags & + (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + !only_subdb.iov_base && + (env_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { + const bool successful_check = + (err | ctx->result.total_problems | ctx->result.problems_meta) == 0; + if (successful_check || force_turn_meta) { + const size_t step_lineno = print( + MDBX_chk_resolution, + "Performing turn to the specified meta-page (%d) due to %s!", + stuck_meta, + successful_check ? "successful check" : "the -T option was given"); + flush(); + err = mdbx_env_turn_for_recovery(ctx->env, stuck_meta); + if (err != MDBX_SUCCESS) + error_fn("mdbx_env_turn_for_recovery", err); + else + suffix(step_lineno, "done"); + } else { + print(MDBX_chk_resolution, + "Skipping turn to the specified meta-page (%d) due to " + "unsuccessful check!", + stuck_meta); + lf_flush(); + } } - print(" txn#%" PRIu64, txnid); - const int head = meta_head(); - if (num == head) - print(", head"); - else if (num == meta_tail(head)) - print(", tail"); - else - print(", stay"); - - if (stuck_meta >= 0) { - if (num == stuck_meta) - print(", forced for checking"); - } else if (txnid > envinfo.mi_recent_txnid && - (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) - print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")", - txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid); - print("\n"); -} - -static uint64_t get_meta_txnid(const unsigned meta_id) { - switch (meta_id) { - default: - assert(false); - error("unexpected meta_id %u\n", meta_id); - return 0; - case 0: - return envinfo.mi_meta0_txnid; - case 1: - return envinfo.mi_meta1_txnid; - case 2: - return envinfo.mi_meta2_txnid; - } -} - -static void print_size(const char *prefix, const uint64_t value, - const char *suffix) { - const char sf[] = - "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ - double k = 1024.0; - size_t i; - for (i = 0; sf[i + 1] && value / k > 1000.0; ++i) - k *= 1024; - print("%s%" PRIu64 " (%.2f %cb)%s", prefix, value, value / k, sf[i], suffix); + return err; } int main(int argc, char *argv[]) { int rc; char *prog = argv[0]; char *envname; - unsigned problems_maindb = 0, problems_freedb = 0, problems_meta = 0; - bool write_locked = false; - bool turn_meta = false; - bool force_turn_meta = false; bool warmup = false; MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default; + if (argc < 2) + usage(prog); + double elapsed; #if defined(_WIN32) || defined(_WIN64) uint64_t timestamp_start, timestamp_finish; @@ -1176,20 +454,11 @@ int main(int argc, char *argv[]) { #else struct timespec timestamp_start, timestamp_finish; if (clock_gettime(CLOCK_MONOTONIC, ×tamp_start)) { - rc = errno; - error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("clock_gettime", errno); return EXIT_FAILURE_SYS; } #endif - dbi_meta.name.iov_base = MDBX_PGWALK_META; - dbi_free.name.iov_base = MDBX_PGWALK_GC; - dbi_main.name.iov_base = MDBX_PGWALK_MAIN; - atexit(pagemap_cleanup); - - if (argc < 2) - usage(prog); - for (int i; (i = getopt(argc, argv, "uU" "0" @@ -1222,7 +491,10 @@ int main(int argc, char *argv[]) { mdbx_build.options); return EXIT_SUCCESS; case 'v': - verbose++; + if (verbose >= 9 && 0) + usage(prog); + else + verbose += 1; break; case '0': stuck_meta = 0; @@ -1239,8 +511,6 @@ int main(int argc, char *argv[]) { case 'T': turn_meta = force_turn_meta = true; quiet = false; - if (verbose < 2) - verbose = 2; break; case 'q': quiet = true; @@ -1248,27 +518,30 @@ int main(int argc, char *argv[]) { case 'n': break; case 'w': - envflags &= ~MDBX_RDONLY; + env_flags &= ~MDBX_RDONLY; + chk_flags |= MDBX_CHK_READWRITE; #if MDBX_MMAP_INCOHERENT_FILE_WRITE /* Temporary `workaround` for OpenBSD kernel's flaw. * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ - envflags |= MDBX_WRITEMAP; + env_flags |= MDBX_WRITEMAP; #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ break; case 'c': - envflags = (envflags & ~MDBX_EXCLUSIVE) | MDBX_ACCEDE; + env_flags = (env_flags & ~MDBX_EXCLUSIVE) | MDBX_ACCEDE; break; case 'd': - dont_traversal = true; + chk_flags |= MDBX_CHK_SKIP_BTREE_TRAVERSAL; break; case 's': if (only_subdb.iov_base && strcmp(only_subdb.iov_base, optarg)) usage(prog); - only_subdb.iov_base = optarg; - only_subdb.iov_len = strlen(optarg); + else { + only_subdb.iov_base = optarg; + only_subdb.iov_len = strlen(optarg); + } break; case 'i': - ignore_wrong_order = true; + chk_flags |= MDBX_CHK_IGNORE_ORDER; break; case 'u': warmup = true; @@ -1287,26 +560,29 @@ int main(int argc, char *argv[]) { usage(prog); rc = MDBX_SUCCESS; - if (stuck_meta >= 0 && (envflags & MDBX_EXCLUSIVE) == 0) { - error("exclusive mode is required to using specific meta-page(%d) for " - "checking.\n", - stuck_meta); + if (stuck_meta >= 0 && (env_flags & MDBX_EXCLUSIVE) == 0) { + error_fmt("exclusive mode is required to using specific meta-page(%d) for " + "checking.", + stuck_meta); rc = EXIT_INTERRUPTED; } if (turn_meta) { if (stuck_meta < 0) { - error("meta-page must be specified (by -0, -1 or -2 options) to turn to " - "it.\n"); + error_fmt( + "meta-page must be specified (by -0, -1 or -2 options) to turn to " + "it."); rc = EXIT_INTERRUPTED; } - if (envflags & MDBX_RDONLY) { - error("write-mode must be enabled to turn to the specified meta-page.\n"); + if (env_flags & MDBX_RDONLY) { + error_fmt( + "write-mode must be enabled to turn to the specified meta-page."); rc = EXIT_INTERRUPTED; } - if (only_subdb.iov_base || dont_traversal) { - error( + if (only_subdb.iov_base || (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | + MDBX_CHK_SKIP_KV_TRAVERSAL))) { + error_fmt( "whole database checking with b-tree traversal are required to turn " - "to the specified meta-page.\n"); + "to the specified meta-page."); rc = EXIT_INTERRUPTED; } } @@ -1327,13 +603,14 @@ int main(int argc, char *argv[]) { #endif /* !WINDOWS */ envname = argv[optind]; - print("mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...\n", + print(MDBX_chk_result, + "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, - (envflags & MDBX_RDONLY) ? "only" : "write"); - fflush(nullptr); - mdbx_setup_debug((verbose < MDBX_LOG_TRACE - 1) - ? (MDBX_log_level_t)(verbose + 1) + (env_flags & MDBX_RDONLY) ? "only" : "write"); + lf_flush(); + mdbx_setup_debug((verbose + MDBX_LOG_WARN < MDBX_LOG_TRACE) + ? (MDBX_log_level_t)(verbose + MDBX_LOG_WARN) : MDBX_LOG_TRACE, MDBX_DBG_DUMP | MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE, @@ -1341,22 +618,22 @@ int main(int argc, char *argv[]) { rc = mdbx_env_create(&env); if (rc) { - error("mdbx_env_create() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("mdbx_env_create", rc); return rc < 0 ? EXIT_FAILURE_MDBX : EXIT_FAILURE_SYS; } - rc = mdbx_env_set_maxdbs(env, MDBX_MAX_DBI); + rc = mdbx_env_set_maxdbs(env, CORE_DBS); if (rc) { - error("mdbx_env_set_maxdbs() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("mdbx_env_set_maxdbs", rc); goto bailout; } if (stuck_meta >= 0) { rc = mdbx_env_open_for_recovery(env, envname, stuck_meta, - (envflags & MDBX_RDONLY) ? false : true); + (env_flags & MDBX_RDONLY) ? false : true); } else { - rc = mdbx_env_open(env, envname, envflags, 0); - if ((envflags & MDBX_EXCLUSIVE) && + rc = mdbx_env_open(env, envname, env_flags, 0); + if ((env_flags & MDBX_EXCLUSIVE) && (rc == MDBX_BUSY || #if defined(_WIN32) || defined(_WIN64) rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION @@ -1364,489 +641,51 @@ int main(int argc, char *argv[]) { rc == EBUSY || rc == EAGAIN #endif )) { - envflags &= ~MDBX_EXCLUSIVE; - rc = mdbx_env_open(env, envname, envflags | MDBX_ACCEDE, 0); + env_flags &= ~MDBX_EXCLUSIVE; + rc = mdbx_env_open(env, envname, env_flags | MDBX_ACCEDE, 0); } } if (rc) { - error("mdbx_env_open() failed, error %d %s\n", rc, mdbx_strerror(rc)); - if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY)) - print("Please run %s in the read-write mode (with '-w' option).\n", prog); + error_fn("mdbx_env_open", rc); + if (rc == MDBX_WANNA_RECOVERY && (env_flags & MDBX_RDONLY)) + print_ln(MDBX_chk_result, + "Please run %s in the read-write mode (with '-w' option).", + prog); goto bailout; } - if (verbose) - print(" - %s mode\n", - (envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); - - if ((envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { - if (verbose) { - print(" - taking write lock..."); - fflush(nullptr); - } - rc = mdbx_txn_lock(env, false); - if (rc != MDBX_SUCCESS) { - error("mdbx_txn_lock() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - if (verbose) - print(" done\n"); - write_locked = true; - } + print_ln(MDBX_chk_verbose, "%s mode", + (env_flags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); if (warmup) { - if (verbose) { - print(" - warming up..."); - fflush(nullptr); - } + anchor_lineno = print(MDBX_chk_verbose, "warming up..."); + flush(); rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536); if (MDBX_IS_ERROR(rc)) { - error("mdbx_env_warmup(flags %u) failed, error %d %s\n", warmup_flags, rc, - mdbx_strerror(rc)); + error_fn("mdbx_env_warmup", rc); goto bailout; } - if (verbose) - print(" %s\n", rc ? "timeout" : "done"); + suffix(anchor_lineno, rc ? "timeout" : "done"); } - rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + rc = mdbx_env_chk(env, &cb, &chk, chk_flags, + MDBX_chk_result + (verbose << MDBX_chk_severity_prio_shift), + 0); if (rc) { - error("mdbx_txn_begin() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - rc = mdbx_env_info_ex(env, txn, &envinfo, sizeof(envinfo)); - if (rc) { - error("mdbx_env_info_ex() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - if (verbose) { - print(" - current boot-id "); - if (envinfo.mi_bootid.current.x | envinfo.mi_bootid.current.y) - print("%016" PRIx64 "-%016" PRIx64 "\n", envinfo.mi_bootid.current.x, - envinfo.mi_bootid.current.y); - else - print("unavailable\n"); - } - - mdbx_filehandle_t dxb_fd; - rc = mdbx_env_get_fd(env, &dxb_fd); - if (rc) { - error("mdbx_env_get_fd() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - uint64_t dxb_filesize = 0; -#if defined(_WIN32) || defined(_WIN64) - { - BY_HANDLE_FILE_INFORMATION info; - if (!GetFileInformationByHandle(dxb_fd, &info)) - rc = GetLastError(); - else - dxb_filesize = info.nFileSizeLow | (uint64_t)info.nFileSizeHigh << 32; - } -#else - { - struct stat st; - STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(uint64_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); - if (fstat(dxb_fd, &st)) - rc = errno; - else - dxb_filesize = st.st_size; - } -#endif - if (rc) { - error("osal_filesize() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - errno = 0; - const uint64_t dxbfile_pages = dxb_filesize / envinfo.mi_dxb_pagesize; - alloc_pages = txn->mt_next_pgno; - backed_pages = envinfo.mi_geo.current / envinfo.mi_dxb_pagesize; - if (backed_pages > dxbfile_pages) { - print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - backed_pages, dxbfile_pages); - ++problems_meta; - } - if (dxbfile_pages < NUM_METAS) - print(" ! file-pages %" PRIu64 " < %u\n", dxbfile_pages, NUM_METAS); - if (backed_pages < NUM_METAS) - print(" ! backed-pages %" PRIu64 " < %u\n", backed_pages, NUM_METAS); - if (backed_pages < NUM_METAS || dxbfile_pages < NUM_METAS) - goto bailout; - if (backed_pages > MAX_PAGENO + 1) { - print(" ! backed-pages %" PRIu64 " > max-pages %" PRIaPGNO "\n", - backed_pages, MAX_PAGENO + 1); - ++problems_meta; - backed_pages = MAX_PAGENO + 1; - } - - if ((envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { - if (backed_pages > dxbfile_pages) { - print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - backed_pages, dxbfile_pages); - ++problems_meta; - backed_pages = dxbfile_pages; - } - if (alloc_pages > backed_pages) { - print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n", - alloc_pages, backed_pages); - ++problems_meta; - alloc_pages = backed_pages; - } - } else { - /* LY: DB may be shrunk by writer down to the allocated pages. */ - if (alloc_pages > backed_pages) { - print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n", - alloc_pages, backed_pages); - ++problems_meta; - alloc_pages = backed_pages; - } - if (alloc_pages > dxbfile_pages) { - print(" ! alloc-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - alloc_pages, dxbfile_pages); - ++problems_meta; - alloc_pages = dxbfile_pages; - } - if (backed_pages > dxbfile_pages) - backed_pages = dxbfile_pages; - } - - if (verbose) { - print(" - pagesize %u (%u system), max keysize %d..%d" - ", max readers %u\n", - envinfo.mi_dxb_pagesize, envinfo.mi_sys_pagesize, - mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), - mdbx_env_get_maxkeysize_ex(env, 0), envinfo.mi_maxreaders); - print_size(" - mapsize ", envinfo.mi_mapsize, "\n"); - if (envinfo.mi_geo.lower == envinfo.mi_geo.upper) - print_size(" - fixed datafile: ", envinfo.mi_geo.current, ""); - else { - print_size(" - dynamic datafile: ", envinfo.mi_geo.lower, ""); - print_size(" .. ", envinfo.mi_geo.upper, ", "); - print_size("+", envinfo.mi_geo.grow, ", "); - print_size("-", envinfo.mi_geo.shrink, "\n"); - print_size(" - current datafile: ", envinfo.mi_geo.current, ""); - } - printf(", %" PRIu64 " pages\n", - envinfo.mi_geo.current / envinfo.mi_dxb_pagesize); -#if defined(_WIN32) || defined(_WIN64) - if (envinfo.mi_geo.shrink && envinfo.mi_geo.current != envinfo.mi_geo.upper) - print( - " WARNING: Due Windows system limitations a " - "file couldn't\n be truncated while the database " - "is opened. So, the size\n database file " - "of may by large than the database itself,\n " - "until it will be closed or reopened in read-write mode.\n"); -#endif - verbose_meta(0, envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_bootid.meta0.x, envinfo.mi_bootid.meta0.y); - verbose_meta(1, envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_bootid.meta1.x, envinfo.mi_bootid.meta1.y); - verbose_meta(2, envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_bootid.meta2.x, envinfo.mi_bootid.meta2.y); - } - - if (stuck_meta >= 0) { - if (verbose) { - print(" - skip checking meta-pages since the %u" - " is selected for verification\n", - stuck_meta); - print(" - transactions: recent %" PRIu64 - ", selected for verification %" PRIu64 ", lag %" PRIi64 "\n", - envinfo.mi_recent_txnid, get_meta_txnid(stuck_meta), - envinfo.mi_recent_txnid - get_meta_txnid(stuck_meta)); - } - } else { - if (verbose > 1) - print(" - performs check for meta-pages clashes\n"); - if (meta_eq(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 0, 1); - ++problems_meta; - } - if (meta_eq(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 1, 2); - ++problems_meta; - } - if (meta_eq(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 2, 0); - ++problems_meta; - } - - const unsigned steady_meta_id = meta_recent(true); - const uint64_t steady_meta_txnid = get_meta_txnid(steady_meta_id); - const unsigned weak_meta_id = meta_recent(false); - const uint64_t weak_meta_txnid = get_meta_txnid(weak_meta_id); - if (envflags & MDBX_EXCLUSIVE) { - if (verbose > 1) - print(" - performs full check recent-txn-id with meta-pages\n"); - if (steady_meta_txnid != envinfo.mi_recent_txnid) { - print(" ! steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")\n", - steady_meta_id, steady_meta_txnid, envinfo.mi_recent_txnid); - ++problems_meta; - } - } else if (write_locked) { - if (verbose > 1) - print(" - performs lite check recent-txn-id with meta-pages (not a " - "monopolistic mode)\n"); - if (weak_meta_txnid != envinfo.mi_recent_txnid) { - print(" ! weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")\n", - weak_meta_id, weak_meta_txnid, envinfo.mi_recent_txnid); - ++problems_meta; - } - } else if (verbose) { - print(" - skip check recent-txn-id with meta-pages (monopolistic or " - "read-write mode only)\n"); - } - total_problems += problems_meta; - - if (verbose) - print(" - transactions: recent %" PRIu64 ", latter reader %" PRIu64 - ", lag %" PRIi64 "\n", - envinfo.mi_recent_txnid, envinfo.mi_latter_reader_txnid, - envinfo.mi_recent_txnid - envinfo.mi_latter_reader_txnid); - } - - if (!dont_traversal) { - struct problem *saved_list; - size_t traversal_problems; - uint64_t empty_pages, lost_bytes; - - print("Traversal b-tree by txn#%" PRIaTXN "...\n", txn->mt_txnid); - fflush(nullptr); - walk.pagemap = osal_calloc((size_t)backed_pages, sizeof(*walk.pagemap)); - if (!walk.pagemap) { - rc = errno ? errno : MDBX_ENOMEM; - error("calloc() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - saved_list = problems_push(); - rc = mdbx_env_pgwalk(txn, pgvisitor, nullptr, - true /* always skip key ordering checking to avoid - MDBX_CORRUPTED when using custom comparators */); - traversal_problems = problems_pop(saved_list); - - if (rc) { - if (rc != MDBX_EINTR || !check_user_break()) - error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - for (uint64_t n = 0; n < alloc_pages; ++n) - if (!walk.pagemap[n]) - unused_pages += 1; - - empty_pages = lost_bytes = 0; - for (walk_dbi_t *dbi = &dbi_main; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - empty_pages += dbi->pages.empty; - lost_bytes += dbi->lost_bytes; - } - - if (verbose) { - uint64_t total_page_bytes = walk.pgcount * envinfo.mi_dxb_pagesize; - print(" - pages: walked %" PRIu64 ", left/unused %" PRIu64 "\n", - walk.pgcount, unused_pages); - if (verbose > 1) { - for (walk_dbi_t *dbi = walk.dbi; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - print(" %s: subtotal %" PRIu64, sdb_name(&dbi->name), - dbi->pages.total); - if (dbi->pages.other && dbi->pages.other != dbi->pages.total) - print(", other %" PRIu64, dbi->pages.other); - if (dbi->pages.branch) - print(", branch %" PRIu64, dbi->pages.branch); - if (dbi->pages.large_count) - print(", large %" PRIu64, dbi->pages.large_count); - uint64_t all_leaf = dbi->pages.leaf + dbi->pages.leaf_dupfixed; - if (all_leaf) { - print(", leaf %" PRIu64, all_leaf); - if (verbose > 2 && - (dbi->pages.subleaf_dupsort | dbi->pages.leaf_dupfixed | - dbi->pages.subleaf_dupfixed)) - print(" (usual %" PRIu64 ", sub-dupsort %" PRIu64 - ", dupfixed %" PRIu64 ", sub-dupfixed %" PRIu64 ")", - dbi->pages.leaf, dbi->pages.subleaf_dupsort, - dbi->pages.leaf_dupfixed, dbi->pages.subleaf_dupfixed); - } - print("\n"); - } - } - - if (verbose > 1) - print(" - usage: total %" PRIu64 " bytes, payload %" PRIu64 - " (%.1f%%), unused " - "%" PRIu64 " (%.1f%%)\n", - total_page_bytes, walk.total_payload_bytes, - walk.total_payload_bytes * 100.0 / total_page_bytes, - total_page_bytes - walk.total_payload_bytes, - (total_page_bytes - walk.total_payload_bytes) * 100.0 / - total_page_bytes); - if (verbose > 2) { - for (walk_dbi_t *dbi = walk.dbi; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) - if (dbi->pages.total) { - uint64_t dbi_bytes = dbi->pages.total * envinfo.mi_dxb_pagesize; - print(" %s: subtotal %" PRIu64 " bytes (%.1f%%)," - " payload %" PRIu64 " (%.1f%%), unused %" PRIu64 " (%.1f%%)", - sdb_name(&dbi->name), dbi_bytes, - dbi_bytes * 100.0 / total_page_bytes, dbi->payload_bytes, - dbi->payload_bytes * 100.0 / dbi_bytes, - dbi_bytes - dbi->payload_bytes, - (dbi_bytes - dbi->payload_bytes) * 100.0 / dbi_bytes); - if (dbi->pages.empty) - print(", %" PRIu64 " empty pages", dbi->pages.empty); - if (dbi->lost_bytes) - print(", %" PRIu64 " bytes lost", dbi->lost_bytes); - print("\n"); - } else - print(" %s: empty\n", sdb_name(&dbi->name)); - } - print(" - summary: average fill %.1f%%", - walk.total_payload_bytes * 100.0 / total_page_bytes); - if (empty_pages) - print(", %" PRIu64 " empty pages", empty_pages); - if (lost_bytes) - print(", %" PRIu64 " bytes lost", lost_bytes); - print(", %" PRIuPTR " problems\n", traversal_problems); - } - } else if (verbose) { - print("Skipping b-tree walk...\n"); - fflush(nullptr); - } - - if (gc_tree_problems) { - print("Skip processing %s since %s is corrupted (%u problems)\n", "@GC", - "b-tree", gc_tree_problems); - problems_freedb = gc_tree_problems; - } else - problems_freedb = process_db(FREE_DBI, MDBX_PGWALK_GC, handle_freedb); - - if (verbose) { - uint64_t value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize; - double percent = value / 100.0; - print(" - space: %" PRIu64 " total pages", value); - print(", backed %" PRIu64 " (%.1f%%)", backed_pages, - backed_pages / percent); - print(", allocated %" PRIu64 " (%.1f%%)", alloc_pages, - alloc_pages / percent); - - if (verbose > 1) { - value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages; - print(", remained %" PRIu64 " (%.1f%%)", value, value / percent); - - value = dont_traversal ? alloc_pages - gc_pages : walk.pgcount; - print(", used %" PRIu64 " (%.1f%%)", value, value / percent); - - print(", gc %" PRIu64 " (%.1f%%)", gc_pages, gc_pages / percent); - - value = gc_pages - reclaimable_pages; - print(", detained %" PRIu64 " (%.1f%%)", value, value / percent); - - print(", reclaimable %" PRIu64 " (%.1f%%)", reclaimable_pages, - reclaimable_pages / percent); - } - - value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages + - reclaimable_pages; - print(", available %" PRIu64 " (%.1f%%)\n", value, value / percent); - } - - if ((problems_maindb = data_tree_problems) == 0 && problems_freedb == 0) { - if (!dont_traversal && - (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { - if (walk.pgcount != alloc_pages - gc_pages) { - error("used pages mismatch (%" PRIu64 "(walked) != %" PRIu64 - "(allocated - GC))\n", - walk.pgcount, alloc_pages - gc_pages); - } - if (unused_pages != gc_pages) { - error("GC pages mismatch (%" PRIu64 "(expected) != %" PRIu64 "(GC))\n", - unused_pages, gc_pages); - } - } else if (verbose) { - print(" - skip check used and GC pages (btree-traversal with " - "monopolistic or read-write mode only)\n"); - } - - problems_maindb = process_db(~0u, /* MAIN_DBI */ nullptr, nullptr); - if (problems_maindb == 0) { - print("Scanning %s for %s...\n", "@MAIN", "sub-database(s)"); - if (!process_db(MAIN_DBI, nullptr, handle_maindb)) { - if (!userdb_count && verbose) - print(" - does not contain multiple databases\n"); - } - } else { - print("Skip processing %s since %s is corrupted (%u problems)\n", - "sub-database(s)", "@MAIN", problems_maindb); - } - } else { - print("Skip processing %s since %s is corrupted (%u problems)\n", "@MAIN", - "b-tree", data_tree_problems); - } - - if (rc == 0 && total_problems == 1 && problems_meta == 1 && !dont_traversal && - (envflags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && stuck_meta < 0 && - get_meta_txnid(meta_recent(true)) < envinfo.mi_recent_txnid) { - print("Perform sync-to-disk for make steady checkpoint at txn-id #%" PRIi64 - "\n", - envinfo.mi_recent_txnid); - fflush(nullptr); - if (write_locked) { - mdbx_txn_unlock(env); - write_locked = false; - } - rc = mdbx_env_sync_ex(env, true, false); - if (rc != MDBX_SUCCESS) - error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc)); - else { - total_problems -= 1; - problems_meta -= 1; - } - } - - if (turn_meta && stuck_meta >= 0 && !dont_traversal && !only_subdb.iov_base && - (envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { - const bool successful_check = (rc | total_problems | problems_meta) == 0; - if (successful_check || force_turn_meta) { - fflush(nullptr); - print(" = Performing turn to the specified meta-page (%d) due to %s!\n", - stuck_meta, - successful_check ? "successful check" : "the -T option was given"); - fflush(nullptr); - rc = mdbx_env_turn_for_recovery(env, stuck_meta); - if (rc != MDBX_SUCCESS) - error("mdbx_env_turn_for_recovery() failed, error %d %s\n", rc, - mdbx_strerror(rc)); - } else { - print(" = Skipping turn to the specified meta-page (%d) due to " - "unsuccessful check!\n", - stuck_meta); - } + if (chk.result.total_problems == 0) + error_fn("mdbx_env_chk", rc); + else if (rc != MDBX_EINTR && rc != MDBX_RESULT_TRUE && !user_break) + rc = 0; } bailout: - if (txn) - mdbx_txn_abort(txn); - if (write_locked) { - mdbx_txn_unlock(env); - write_locked = false; - } if (env) { - const bool dont_sync = rc != 0 || total_problems; + const bool dont_sync = rc != 0 || chk.result.total_problems; mdbx_env_close_ex(env, dont_sync); } - fflush(nullptr); + flush(); if (rc) { - if (rc < 0) + if (rc > 0) return user_break ? EXIT_INTERRUPTED : EXIT_FAILURE_SYS; return EXIT_FAILURE_MDBX; } @@ -1856,21 +695,24 @@ bailout: elapsed = (timestamp_finish - timestamp_start) * 1e-3; #else if (clock_gettime(CLOCK_MONOTONIC, ×tamp_finish)) { - rc = errno; - error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("clock_gettime", errno); return EXIT_FAILURE_SYS; } elapsed = timestamp_finish.tv_sec - timestamp_start.tv_sec + (timestamp_finish.tv_nsec - timestamp_start.tv_nsec) * 1e-9; #endif /* !WINDOWS */ - if (total_problems) { - print("Total %u error%s detected, elapsed %.3f seconds.\n", total_problems, - (total_problems > 1) ? "s are" : " is", elapsed); - if (problems_meta || problems_maindb || problems_freedb) + if (chk.result.total_problems) { + print_ln(MDBX_chk_result, + "Total %" PRIuSIZE " error%s detected, elapsed %.3f seconds.", + chk.result.total_problems, + (chk.result.total_problems > 1) ? "s are" : " is", elapsed); + if (chk.result.problems_meta || chk.result.problems_kv || + chk.result.problems_gc) return EXIT_FAILURE_CHECK_MAJOR; return EXIT_FAILURE_CHECK_MINOR; } - print("No error is detected, elapsed %.3f seconds\n", elapsed); + print_ln(MDBX_chk_result, "No error is detected, elapsed %.3f seconds.", + elapsed); return EXIT_SUCCESS; }