/// \copyright SPDX-License-Identifier: Apache-2.0 /// \author Леонид Юрьев aka Leonid Yuriev \date 2015-2024 #include "internals.h" typedef struct MDBX_chk_internal { MDBX_chk_context_t *usr; const struct MDBX_chk_callbacks *cb; uint64_t monotime_timeout; size_t *problem_counter; uint8_t flags; bool got_break; bool write_locked; uint8_t scope_depth; MDBX_chk_table_t table_gc, table_main; int16_t *pagemap; MDBX_chk_table_t *last_lookup; const void *last_nested; MDBX_chk_scope_t scope_stack[12]; MDBX_chk_table_t *table[MDBX_MAX_DBI + CORE_DBS]; MDBX_envinfo envinfo; troika_t troika; MDBX_val v2a_buf; } MDBX_chk_internal_t; __cold static int chk_check_break(MDBX_chk_scope_t *const scope) { MDBX_chk_internal_t *const chk = scope->internal; return (chk->got_break || (chk->cb->check_break && (chk->got_break = chk->cb->check_break(chk->usr)))) ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } __cold static void chk_line_end(MDBX_chk_line_t *line) { if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (likely(chk->cb->print_done)) chk->cb->print_done(line); } } __cold __must_check_result static MDBX_chk_line_t * chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { MDBX_chk_internal_t *const chk = scope->internal; if (severity < MDBX_chk_warning) mdbx_env_chk_encount_problem(chk->usr); MDBX_chk_line_t *line = nullptr; if (likely(chk->cb->print_begin)) { line = chk->cb->print_begin(chk->usr, severity); if (likely(line)) { assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty)); assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); line->ctx = chk->usr; } } return line; } __cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) { if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; enum MDBX_chk_severity severity = line->severity; chk_line_end(line); line = chk_line_begin(chk->usr->scope, severity); } return line; } __cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (likely(chk->cb->print_flush)) { chk->cb->print_flush(line); assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); line->out = line->begin; } } return line; } __cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { if (likely(line && need)) { size_t have = line->end - line->out; assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (need > have) { line = chk_flush(line); have = line->end - line->out; } return (need < have) ? need : have; } return 0; } __cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, const char *str) { if (likely(line && str && *str)) { MDBX_chk_internal_t *chk = line->ctx->internal; size_t left = strlen(str); assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (chk->cb->print_chars) { chk->cb->print_chars(line, str, left); assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); } else do { size_t chunk = chk_print_wanna(line, left); assert(chunk <= left); if (unlikely(!chunk)) break; memcpy(line->out, str, chunk); line->out += chunk; assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); str += chunk; left -= chunk; } while (left); line->empty = false; } return line; } __cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, const char *fmt, va_list args) { if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); if (chk->cb->print_format) { chk->cb->print_format(line, fmt, args); assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); } else { va_list ones; va_copy(ones, args); const int needed = vsnprintf(nullptr, 0, fmt, ones); va_end(ones); if (likely(needed > 0)) { const size_t have = chk_print_wanna(line, needed); if (likely(have > 0)) { int written = vsnprintf(line->out, have, fmt, args); if (likely(written > 0)) line->out += written; assert(line->begin <= line->end && line->begin <= line->out && line->out <= line->end); } } } line->empty = false; } return line; } __cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { if (likely(line)) { // MDBX_chk_internal_t *chk = line->ctx->internal; va_list args; va_start(args, fmt); line = chk_print_va(line, fmt, args); va_end(args); line->empty = false; } return line; } __cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, const char *prefix, const uint64_t value, const char *suffix) { static const char sf[] = "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ if (likely(line)) { MDBX_chk_internal_t *chk = line->ctx->internal; prefix = prefix ? prefix : ""; suffix = suffix ? suffix : ""; if (chk->cb->print_size) chk->cb->print_size(line, prefix, value, suffix); else for (unsigned i = 0;; ++i) { const unsigned scale = 10 + i * 10; const uint64_t rounded = value + (UINT64_C(5) << (scale - 10)); const uint64_t integer = rounded >> scale; const uint64_t fractional = (rounded - (integer << scale)) * 100u >> scale; if ((rounded >> scale) <= 1000) return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, value, (unsigned)integer, (unsigned)fractional, sf[i], suffix); } line->empty = false; } return line; } __cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, const char *subj) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); if (line) chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, mdbx_strerror(err), err))); else debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", subj, mdbx_strerror(err), err); return err; } __cold static void MDBX_PRINTF_ARGS(5, 6) chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, uint64_t entry_number, const char *caption, const char *extra_fmt, ...) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_issue_t *issue = chk->usr->scope->issues; while (issue) { if (issue->caption == caption) { issue->count += 1; break; } else issue = issue->next; } const bool fresh = issue == nullptr; if (fresh) { issue = osal_malloc(sizeof(*issue)); if (likely(issue)) { issue->caption = caption; issue->count = 1; issue->next = chk->usr->scope->issues; chk->usr->scope->issues = issue; } else chk_error_rc(scope, ENOMEM, "adding issue"); } va_list args; va_start(args, extra_fmt); if (chk->cb->issue) { mdbx_env_chk_encount_problem(chk->usr); chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args); } else { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); if (entry_number != UINT64_MAX) chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption); else chk_print(line, "%s: %s", object, caption); if (extra_fmt) chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")"); chk_line_end(fresh ? chk_flush(line) : line); } va_end(args); } __cold static void MDBX_PRINTF_ARGS(2, 3) chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { MDBX_chk_internal_t *const chk = scope->internal; va_list args; va_start(args, fmt); if (likely(chk->cb->issue)) { mdbx_env_chk_encount_problem(chk->usr); chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); } else chk_line_end( chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); va_end(args); } __cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) { assert(chk->scope_depth > 0); MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth; MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr; if (!outer || outer->stage != inner->stage) { if (err == MDBX_SUCCESS && *chk->problem_counter) err = MDBX_PROBLEM; else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err)) *chk->problem_counter = 1; if (chk->problem_counter != &chk->usr->result.total_problems) { chk->usr->result.total_problems += *chk->problem_counter; chk->problem_counter = &chk->usr->result.total_problems; } if (chk->cb->stage_end) err = chk->cb->stage_end(chk->usr, inner->stage, err); } if (chk->cb->scope_conclude) err = chk->cb->scope_conclude(chk->usr, outer, inner, err); chk->usr->scope = outer; chk->usr->scope_nesting = chk->scope_depth -= 1; if (outer) outer->subtotal_issues += inner->subtotal_issues; if (chk->cb->scope_pop) chk->cb->scope_pop(chk->usr, outer, inner); while (inner->issues) { MDBX_chk_issue_t *next = inner->issues->next; osal_free(inner->issues); inner->issues = next; } memset(inner, -1, sizeof(*inner)); return err; } __cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, int verbosity_adjustment, enum MDBX_chk_stage stage, const void *object, size_t *problems, const char *fmt, va_list args) { if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack))) return MDBX_BACKLOG_DEPLETED; MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth; const int verbosity = outer->verbosity + (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); MDBX_chk_scope_t *const inner = outer + 1; memset(inner, 0, sizeof(*inner)); inner->internal = outer->internal; inner->stage = stage ? stage : (stage = outer->stage); inner->object = object; inner->verbosity = (verbosity < MDBX_chk_warning) ? MDBX_chk_warning : (enum MDBX_chk_severity)verbosity; if (problems) chk->problem_counter = problems; else if (!chk->problem_counter || outer->stage != stage) chk->problem_counter = &chk->usr->result.total_problems; if (chk->cb->scope_push) { const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args); if (unlikely(err != MDBX_SUCCESS)) return err; } chk->usr->scope = inner; chk->usr->scope_nesting = chk->scope_depth += 1; if (stage != outer->stage && chk->cb->stage_begin) { int err = chk->cb->stage_begin(chk->usr, stage); if (unlikely(err != MDBX_SUCCESS)) { err = chk_scope_end(chk, err); assert(err != MDBX_SUCCESS); return err ? err : MDBX_RESULT_TRUE; } } return MDBX_SUCCESS; } __cold static int MDBX_PRINTF_ARGS(6, 7) chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, enum MDBX_chk_stage stage, const void *object, size_t *problems, const char *fmt, ...) { va_list args; va_start(args, fmt); int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, problems, fmt, args); va_end(args); return rc; } __cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) { MDBX_chk_internal_t *const chk = target->internal; assert(target <= chk->usr->scope); while (chk->usr->scope > target) err = chk_scope_end(chk, err); return err; } __cold void chk_scope_pop(MDBX_chk_scope_t *const inner) { if (inner && inner > inner->internal->scope_stack) chk_scope_restore(inner - 1, MDBX_SUCCESS); } __cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4) chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, const char *fmt, ...) { chk_scope_restore(scope, MDBX_SUCCESS); va_list args; va_start(args, fmt); int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, scope->stage, nullptr, nullptr, fmt, args); va_end(args); return err ? nullptr : scope + 1; } __cold static const char *chk_v2a(MDBX_chk_internal_t *chk, const MDBX_val *val) { if (val == MDBX_CHK_MAIN) return "@MAIN"; if (val == MDBX_CHK_GC) return "@GC"; if (val == MDBX_CHK_META) return "@META"; const unsigned char *const data = val->iov_base; const size_t len = val->iov_len; if (data == MDBX_CHK_MAIN) return "@MAIN"; if (data == MDBX_CHK_GC) return "@GC"; if (data == MDBX_CHK_META) return "@META"; if (!len) return ""; if (!data) return ""; if (len > 65536) { const size_t enough = 42; if (chk->v2a_buf.iov_len < enough) { void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough); if (unlikely(!ptr)) return ""; chk->v2a_buf.iov_base = ptr; chk->v2a_buf.iov_len = enough; } snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, "", len); return chk->v2a_buf.iov_base; } bool printable = true; bool quoting = false; size_t xchars = 0; for (size_t i = 0; i < len && printable; ++i) { quoting = quoting || !(data[i] == '_' || isalnum(data[i])); printable = isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); } size_t need = len + 1; if (quoting || !printable) need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; if (need > chk->v2a_buf.iov_len) { void *ptr = osal_realloc(chk->v2a_buf.iov_base, need); if (unlikely(!ptr)) return ""; chk->v2a_buf.iov_base = ptr; chk->v2a_buf.iov_len = need; } static const char hex[] = "0123456789abcdef"; char *w = chk->v2a_buf.iov_base; if (!quoting) { memcpy(w, data, len); w += len; } else if (printable) { *w++ = '\''; for (size_t i = 0; i < len; ++i) { if (data[i] < ' ') { assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4); w[0] = '\\'; w[1] = 'x'; w[2] = hex[data[i] >> 4]; w[3] = hex[data[i] & 15]; w += 4; } else if (strchr("\"'`\\", data[i])) { assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); w[0] = '\\'; w[1] = data[i]; w += 2; } else { assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1); *w++ = data[i]; } } *w++ = '\''; } else { *w++ = '\\'; *w++ = 'x'; for (size_t i = 0; i < len; ++i) { assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); w[0] = hex[data[i] >> 4]; w[1] = hex[data[i] & 15]; w += 2; } } assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w); *w = 0; return chk->v2a_buf.iov_base; } __cold static void chk_dispose(MDBX_chk_internal_t *chk) { assert(chk->table[FREE_DBI] == &chk->table_gc); assert(chk->table[MAIN_DBI] == &chk->table_main); for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) { MDBX_chk_table_t *const tbl = chk->table[i]; if (tbl) { chk->table[i] = nullptr; if (chk->cb->table_dispose && tbl->cookie) { chk->cb->table_dispose(chk->usr, tbl); tbl->cookie = nullptr; } if (tbl != &chk->table_gc && tbl != &chk->table_main) { osal_free(tbl); } } } osal_free(chk->v2a_buf.iov_base); osal_free(chk->pagemap); chk->usr->internal = nullptr; chk->usr->scope = nullptr; chk->pagemap = nullptr; memset(chk, 0xDD, sizeof(*chk)); osal_free(chk); } static size_t div_8s(size_t numerator, size_t divider) { assert(numerator <= (SIZE_MAX >> 8)); return (numerator << 8) / divider; } static size_t mul_8s(size_t quotient, size_t multiplier) { size_t hi = multiplier * (quotient >> 8); size_t lo = multiplier * (quotient & 255) + 128; return hi + (lo >> 8); } static void histogram_reduce(struct MDBX_chk_histogram *p) { const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; // ищем пару для слияния с минимальной ошибкой size_t min_err = SIZE_MAX, min_i = last - 1; for (size_t i = 0; i < last; ++i) { const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, s1 = p->ranges[i].amount; const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, s2 = p->ranges[i + 1].amount; const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2; assert(s1 > 0 && b1 > 0 && b1 < e1); assert(s2 > 0 && b2 > 0 && b2 < e2); assert(e1 <= b2); // за ошибку принимаем площадь изменений на гистограмме при слиянии const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx); const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1); const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2); const size_t dx = mul_8s(hx, b2 - e1); const size_t err = d1 + d2 + dx; if (min_err >= err) { min_i = i; min_err = err; } } // объединяем p->ranges[min_i].end = p->ranges[min_i + 1].end; p->ranges[min_i].amount += p->ranges[min_i + 1].amount; p->ranges[min_i].count += p->ranges[min_i + 1].count; if (min_i < last) // перемещаем хвост memmove(p->ranges + min_i, p->ranges + min_i + 1, (last - min_i) * sizeof(p->ranges[0])); // обнуляем последний элемент и продолжаем p->ranges[last].count = 0; } static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2); p->amount += n; p->count += 1; if (likely(n < 2)) { p->ones += n; p->pad += 1; } else for (;;) { const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; size_t i = 0; while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) { if (n < p->ranges[i].end) { // значение попадает в существующий интервал p->ranges[i].amount += n; p->ranges[i].count += 1; return; } ++i; } if (p->ranges[last].count == 0) { // использованы еще не все слоты, добавляем интервал assert(i < size); if (p->ranges[i].count) { // раздвигаем assert(i < last); #ifdef __COVERITY__ if (i < last) /* avoid Coverity false-positive issue */ #endif /* __COVERITY__ */ memmove(p->ranges + i + 1, p->ranges + i, (last - i) * sizeof(p->ranges[0])); } p->ranges[i].begin = n; p->ranges[i].end = n + 1; p->ranges[i].amount = n; p->ranges[i].count = 1; return; } histogram_reduce(p); } } __cold static MDBX_chk_line_t * histogram_dist(MDBX_chk_line_t *line, const struct MDBX_chk_histogram *histogram, const char *prefix, const char *first, bool amount) { line = chk_print(line, "%s:", prefix); const char *comma = ""; const size_t first_val = amount ? histogram->ones : histogram->pad; if (first_val) { chk_print(line, " %s=%" PRIuSIZE, first, first_val); comma = ","; } for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n) if (histogram->ranges[n].count) { chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin); if (histogram->ranges[n].begin != histogram->ranges[n].end - 1) chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1); line = chk_print(line, "=%" PRIuSIZE, amount ? histogram->ranges[n].amount : histogram->ranges[n].count); comma = ","; } return line; } __cold static MDBX_chk_line_t * histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, const struct MDBX_chk_histogram *histogram, const char *prefix, const char *first, bool amount) { if (histogram->count) { line = chk_print(line, "%s %" PRIuSIZE, prefix, amount ? histogram->amount : histogram->count); if (scope->verbosity > MDBX_chk_info) line = chk_puts( histogram_dist(line, histogram, " (distribution", first, amount), ")"); } return line; } //----------------------------------------------------------------------------- __cold static int chk_get_tbl(MDBX_chk_scope_t *const scope, const walk_tbl_t *in, MDBX_chk_table_t **out) { MDBX_chk_internal_t *const chk = scope->internal; if (chk->last_lookup && chk->last_lookup->name.iov_base == in->name.iov_base) { *out = chk->last_lookup; return MDBX_SUCCESS; } for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) { MDBX_chk_table_t *tbl = chk->table[i]; if (!tbl) { tbl = osal_calloc(1, sizeof(MDBX_chk_table_t)); if (unlikely(!tbl)) { *out = nullptr; return chk_error_rc(scope, MDBX_ENOMEM, "alloc_table"); } chk->table[i] = tbl; tbl->flags = in->internal->flags; tbl->id = -1; tbl->name = in->name; } if (tbl->name.iov_base == in->name.iov_base) { if (tbl->id < 0) { tbl->id = (int)i; tbl->cookie = chk->cb->table_filter ? chk->cb->table_filter(chk->usr, &tbl->name, tbl->flags) : (void *)(intptr_t)-1; } *out = (chk->last_lookup = tbl); return MDBX_SUCCESS; } } chk_scope_issue(scope, "too many tables > %u", (unsigned)ARRAY_LENGTH(chk->table) - CORE_DBS - /* meta */ 1); *out = nullptr; return MDBX_PROBLEM; } //------------------------------------------------------------------------------ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, const unsigned num) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose); MDBX_chk_internal_t *const chk = scope->internal; if (line) { MDBX_env *const env = chk->usr->env; const bool have_bootid = (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) != 0; const bool bootid_match = have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], &chk->envinfo.mi_bootid.current, sizeof(chk->envinfo.mi_bootid.current)) == 0; const char *status = "stay"; if (num == chk->troika.recent) status = "head"; else if (num == TROIKA_TAIL(&chk->troika)) status = "tail"; line = chk_print(line, "meta-%u: %s, ", num, status); switch (chk->envinfo.mi_meta_sign[num]) { case DATASIGN_NONE: line = chk_puts(line, "no-sync/legacy"); break; case DATASIGN_WEAK: line = chk_print(line, "weak-%s", have_bootid ? (bootid_match ? "intact (same boot-id)" : "dead") : "unknown (no boot-id)"); break; default: line = chk_puts(line, "steady"); break; } const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid); if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y) line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", chk->envinfo.mi_bootid.meta[num].x, chk->envinfo.mi_bootid.meta[num].y, bootid_match ? "live" : "not match"); else line = chk_puts(line, "no boot-id"); if (env->stuck_meta >= 0) { if (num == (unsigned)env->stuck_meta) line = chk_print(line, ", %s", "forced for checking"); } else if (meta_txnid > chk->envinfo.mi_recent_txnid && (env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) line = chk_print(line, ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 " >>> %" PRIu64 ")", meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, chk->envinfo.mi_recent_txnid); chk_line_end(line); } } __cold static int chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, const int deep, const walk_tbl_t *tbl_info, const size_t page_size, const page_type_t pagetype, const MDBX_error_t page_err, const size_t nentries, const size_t payload_bytes, const size_t header_bytes, const size_t unused_bytes) { MDBX_chk_scope_t *const scope = ctx; MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; MDBX_chk_table_t *tbl; int err = chk_get_tbl(scope, tbl_info, &tbl); if (unlikely(err)) return err; if (deep > 42) { chk_scope_issue(scope, "too deeply %u", deep); return MDBX_CORRUPTED /* avoid infinite loop/recursion */; } histogram_acc(deep, &tbl->histogram.deep); usr->result.processed_pages += npages; const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; int height = deep + 1; if (tbl->id >= CORE_DBS) height -= usr->txn->dbs[MAIN_DBI].height; const tree_t *nested = tbl_info->nested; if (nested) { if (tbl->flags & MDBX_DUPSORT) height -= tbl_info->internal->height; else { chk_object_issue(scope, "nested tree", pgno, "unexpected", "table %s flags 0x%x, deep %i", chk_v2a(chk, &tbl->name), tbl->flags, deep); nested = nullptr; } } else chk->last_nested = nullptr; const char *pagetype_caption; bool branch = false; switch (pagetype) { default: chk_object_issue(scope, "page", pgno, "unknown page-type", "type %u, deep %i", (unsigned)pagetype, deep); pagetype_caption = "unknown"; tbl->pages.other += npages; break; case page_broken: assert(page_err != MDBX_SUCCESS); pagetype_caption = "broken"; tbl->pages.other += npages; break; case page_sub_broken: assert(page_err != MDBX_SUCCESS); pagetype_caption = "broken-subpage"; tbl->pages.other += npages; break; case page_large: pagetype_caption = "large"; histogram_acc(npages, &tbl->histogram.large_pages); if (tbl->flags & MDBX_DUPSORT) chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep); break; case page_branch: branch = true; if (!nested) { pagetype_caption = "branch"; tbl->pages.branch += 1; } else { pagetype_caption = "nested-branch"; tbl->pages.nested_branch += 1; } break; case page_dupfix_leaf: if (!nested) chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep); /* fall through */ __fallthrough; case page_leaf: if (!nested) { pagetype_caption = "leaf"; tbl->pages.leaf += 1; if (height != tbl_info->internal->height) chk_object_issue(scope, "page", pgno, "wrong tree height", "actual %i != %i table %s", height, tbl_info->internal->height, chk_v2a(chk, &tbl->name)); } else { pagetype_caption = (pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix"; tbl->pages.nested_leaf += 1; if (chk->last_nested != nested) { histogram_acc(height, &tbl->histogram.nested_tree); chk->last_nested = nested; } if (height != nested->height) chk_object_issue(scope, "page", pgno, "wrong nested-tree height", "actual %i != %i dupsort-node %s", height, nested->height, chk_v2a(chk, &tbl->name)); } break; case page_sub_dupfix_leaf: case page_sub_leaf: pagetype_caption = (pagetype == page_sub_leaf) ? "subleaf-dupsort" : "subleaf-dupfix"; tbl->pages.nested_subleaf += 1; if ((tbl->flags & MDBX_DUPSORT) == 0 || nested) chk_object_issue(scope, "page", pgno, "unexpected", "type %u, table %s flags 0x%x, deep %i", (unsigned)pagetype, chk_v2a(chk, &tbl->name), tbl->flags, deep); break; } if (npages) { if (tbl->cookie) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); if (npages == 1) chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); else chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, npages); chk_line_end(chk_print(line, " of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i", chk_v2a(chk, &tbl->name), header_bytes, (pagetype == page_branch) ? "keys" : "entries", nentries, payload_bytes, unused_bytes, deep)); } bool already_used = false; for (unsigned n = 0; n < npages; ++n) { const size_t spanpgno = pgno + n; if (spanpgno >= usr->result.alloc_pages) { chk_object_issue(scope, "page", spanpgno, "wrong page-no", "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", pagetype_caption, spanpgno, usr->result.alloc_pages, deep); tbl->pages.all += 1; } else if (chk->pagemap[spanpgno]) { const MDBX_chk_table_t *const rival = chk->table[chk->pagemap[spanpgno] - 1]; chk_object_issue(scope, "page", spanpgno, (branch && rival == tbl) ? "loop" : "already used", "%s-page: by %s, deep %i", pagetype_caption, chk_v2a(chk, &rival->name), deep); already_used = true; } else { chk->pagemap[spanpgno] = (int16_t)tbl->id + 1; tbl->pages.all += 1; } } if (already_used) return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ : MDBX_SUCCESS; } if (MDBX_IS_ERROR(page_err)) { chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", pagetype_caption); } else { if (unused_bytes > page_size) chk_object_issue(scope, "page", pgno, "illegal unused-bytes", "%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0, unused_bytes, env->ps); if (header_bytes < (int)sizeof(long) || (size_t)header_bytes >= env->ps - sizeof(long)) { chk_object_issue(scope, "page", pgno, "illegal header-length", "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, pagetype_caption, sizeof(long), header_bytes, env->ps - sizeof(long)); } if (nentries < 1 || (pagetype == page_branch && nentries < 2)) { chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty", "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE " entries, deep %i", pagetype_caption, payload_bytes, nentries, deep); tbl->pages.empty += 1; } if (npages) { if (page_bytes != page_size) { chk_object_issue(scope, "page", pgno, "misused", "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", pagetype_caption, page_size, page_bytes, header_bytes, payload_bytes, unused_bytes, deep); if (page_size > page_bytes) tbl->lost_bytes += page_size - page_bytes; } else { tbl->payload_bytes += payload_bytes + header_bytes; usr->result.total_payload_bytes += payload_bytes + header_bytes; } } } return chk_check_break(scope); } __cold static int chk_tree(MDBX_chk_scope_t *const scope) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; MDBX_txn *const txn = usr->txn; #if defined(_WIN32) || defined(_WIN64) SetLastError(ERROR_SUCCESS); #else errno = 0; #endif /* Windows */ chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap)); if (!chk->pagemap) { int err = osal_get_errno(); return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc"); } if (scope->verbosity > MDBX_chk_info) chk_scope_push(scope, 0, "Walking pages..."); /* always skip key ordering checking * to avoid MDBX_CORRUPTED in case custom comparators were used */ usr->result.processed_pages = NUM_METAS; int err = walk_pages(txn, chk_pgvisitor, scope, dont_check_keys_ordering); if (MDBX_IS_ERROR(err) && err != MDBX_EINTR) chk_error_rc(scope, err, "walk_pages"); for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n) if (!chk->pagemap[n]) usr->result.unused_pages += 1; MDBX_chk_table_t total; memset(&total, 0, sizeof(total)); total.pages.all = NUM_METAS; for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) { MDBX_chk_table_t *const tbl = chk->table[i]; total.payload_bytes += tbl->payload_bytes; total.lost_bytes += tbl->lost_bytes; total.pages.all += tbl->pages.all; total.pages.empty += tbl->pages.empty; total.pages.other += tbl->pages.other; total.pages.branch += tbl->pages.branch; total.pages.leaf += tbl->pages.leaf; total.pages.nested_branch += tbl->pages.nested_branch; total.pages.nested_leaf += tbl->pages.nested_leaf; total.pages.nested_subleaf += tbl->pages.nested_subleaf; } assert(total.pages.all == usr->result.processed_pages); const size_t total_page_bytes = pgno2bytes(env, total.pages.all); if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose) chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), "walked %zu pages, left/unused %zu" ", %" PRIuSIZE " problem(s)", usr->result.processed_pages, usr->result.unused_pages, usr->scope->subtotal_issues)); err = chk_scope_restore(scope, err); if (scope->verbosity > MDBX_chk_info) { for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) { MDBX_chk_table_t *const tbl = chk->table[i]; MDBX_chk_scope_t *inner = chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &tbl->name)); if (tbl->pages.all == 0) chk_line_end( chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); else { MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); if (line) { line = chk_print(line, "page usage: subtotal %" PRIuSIZE, tbl->pages.all); const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch; const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf + tbl->pages.nested_subleaf; if (tbl->pages.other) line = chk_print(line, ", other %" PRIuSIZE, tbl->pages.other); if (tbl->pages.other == 0 || (branch_pages | leaf_pages | tbl->histogram.large_pages.count) != 0) { line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, branch_pages, leaf_pages); if (tbl->histogram.large_pages.count || (tbl->flags & MDBX_DUPSORT) == 0) { line = chk_print(line, ", large %" PRIuSIZE, tbl->histogram.large_pages.count); if (tbl->histogram.large_pages.amount | tbl->histogram.large_pages.count) line = histogram_print(inner, line, &tbl->histogram.large_pages, " amount", "single", true); } } line = histogram_dist(chk_line_feed(line), &tbl->histogram.deep, "tree deep density", "1", false); if (tbl != &chk->table_gc && tbl->histogram.nested_tree.count) { line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, tbl->histogram.nested_tree.count); line = histogram_dist(line, &tbl->histogram.nested_tree, " density", "1", false); line = chk_print(chk_line_feed(line), "nested tree(s) pages %" PRIuSIZE ": branch %" PRIuSIZE ", leaf %" PRIuSIZE ", subleaf %" PRIuSIZE, tbl->pages.nested_branch + tbl->pages.nested_leaf, tbl->pages.nested_branch, tbl->pages.nested_leaf, tbl->pages.nested_subleaf); } const size_t bytes = pgno2bytes(env, tbl->pages.all); line = chk_print( chk_line_feed(line), "page filling: subtotal %" PRIuSIZE " bytes (%.1f%%), payload %" PRIuSIZE " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", bytes, bytes * 100.0 / total_page_bytes, tbl->payload_bytes, tbl->payload_bytes * 100.0 / bytes, bytes - tbl->payload_bytes, (bytes - tbl->payload_bytes) * 100.0 / bytes); if (tbl->pages.empty) line = chk_print(line, ", %" PRIuSIZE " empty pages", tbl->pages.empty); if (tbl->lost_bytes) line = chk_print(line, ", %" PRIuSIZE " bytes lost", tbl->lost_bytes); chk_line_end(line); } } chk_scope_restore(scope, 0); } } MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); line = chk_print(line, "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," " average fill %.1f%%", total_page_bytes, usr->result.total_payload_bytes, usr->result.total_payload_bytes * 100.0 / total_page_bytes, total_page_bytes - usr->result.total_payload_bytes, (total_page_bytes - usr->result.total_payload_bytes) * 100.0 / total_page_bytes, usr->result.total_payload_bytes * 100.0 / total_page_bytes); if (total.pages.empty) line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty); if (total.lost_bytes) line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes); chk_line_end(line); return err; } typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number, const MDBX_val *key, const MDBX_val *data); __cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number, const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; int err = MDBX_SUCCESS; assert(tbl->cookie); if (chk->cb->table_handle_kv) err = chk->cb->table_handle_kv(chk->usr, tbl, record_number, key, data); return err ? err : chk_check_break(scope); } __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, MDBX_chk_table_t *tbl, chk_kv_visitor *handler) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; MDBX_txn *const txn = usr->txn; MDBX_cursor *cursor = nullptr; size_t record_count = 0, dups = 0, sub_databases = 0; int err; if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->flags) { chk_line_end( chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), "abort processing %s due to a previous error", chk_v2a(chk, &tbl->name)))); err = MDBX_BAD_TXN; goto bailout; } if (0 > (int)dbi) { err = dbi_open( txn, &tbl->name, MDBX_DB_ACCEDE, &dbi, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); if (unlikely(err)) { tASSERT(txn, dbi >= txn->env->n_dbi || (txn->env->dbs_flags[dbi] & DB_VALID) == 0); chk_error_rc(scope, err, "mdbx_dbi_open"); goto bailout; } tASSERT(txn, dbi < txn->env->n_dbi && (txn->env->dbs_flags[dbi] & DB_VALID) != 0); } const tree_t *const db = txn->dbs + dbi; if (handler) { const char *key_mode = nullptr; switch (tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { case 0: key_mode = "usual"; break; case MDBX_REVERSEKEY: key_mode = "reserve"; break; case MDBX_INTEGERKEY: key_mode = "ordinal"; break; case MDBX_REVERSEKEY | MDBX_INTEGERKEY: key_mode = "msgpack"; break; default: key_mode = "inconsistent"; chk_scope_issue(scope, "wrong key-mode (0x%x)", tbl->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); } const char *value_mode = nullptr; switch (tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)) { case 0: value_mode = "single"; break; case MDBX_DUPSORT: value_mode = "multi"; break; case MDBX_DUPSORT | MDBX_REVERSEDUP: value_mode = "multi-reverse"; break; case MDBX_DUPSORT | MDBX_DUPFIXED: value_mode = "multi-samelength"; break; case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: value_mode = "multi-reverse-samelength"; break; case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: value_mode = "multi-ordinal"; break; case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: value_mode = "multi-msgpack"; break; case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: value_mode = "reserved"; break; default: value_mode = "inconsistent"; chk_scope_issue(scope, "wrong value-mode (0x%x)", tbl->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)); } MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, value_mode); line = chk_print(line, ", flags:"); if (!tbl->flags) line = chk_print(line, " none"); else { const uint8_t f[] = {MDBX_DUPSORT, MDBX_INTEGERKEY, MDBX_REVERSEKEY, MDBX_DUPFIXED, MDBX_REVERSEDUP, MDBX_INTEGERDUP, 0}; const char *const t[] = {"dupsort", "integerkey", "reversekey", "dupfix", "reversedup", "integerdup"}; for (size_t i = 0; f[i]; i++) if (tbl->flags & f[i]) line = chk_print(line, " %s", t[i]); } chk_line_end(chk_print(line, " (0x%02X)", tbl->flags)); line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), "entries %" PRIu64 ", sequence %" PRIu64, db->items, db->sequence); if (db->mod_txnid) line = chk_print(line, ", last modification txn#%" PRIaTXN, db->mod_txnid); if (db->root != P_INVALID) line = chk_print(line, ", root #%" PRIaPGNO, db->root); chk_line_end(line); chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose), "b-tree depth %u, pages: branch %" PRIaPGNO ", leaf %" PRIaPGNO ", large %" PRIaPGNO, db->height, db->branch_pages, db->leaf_pages, db->large_pages)); if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { const size_t branch_pages = tbl->pages.branch + tbl->pages.nested_branch; const size_t leaf_pages = tbl->pages.leaf + tbl->pages.nested_leaf; const size_t subtotal_pages = db->branch_pages + db->leaf_pages + db->large_pages; if (subtotal_pages != tbl->pages.all) chk_scope_issue( scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", "subtotal", subtotal_pages, tbl->pages.all); if (db->branch_pages != branch_pages) chk_scope_issue( scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "branch", db->branch_pages, branch_pages); if (db->leaf_pages != leaf_pages) chk_scope_issue( scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "all-leaf", db->leaf_pages, leaf_pages); if (db->large_pages != tbl->histogram.large_pages.amount) chk_scope_issue( scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", "large/overlow", db->large_pages, tbl->histogram.large_pages.amount); } } err = mdbx_cursor_open(txn, dbi, &cursor); if (unlikely(err)) { chk_error_rc(scope, err, "mdbx_cursor_open"); goto bailout; } if (chk->flags & MDBX_CHK_IGNORE_ORDER) { cursor->checking |= z_ignord | z_pagecheck; if (cursor->subcur) cursor->subcur->cursor.checking |= z_ignord | z_pagecheck; } const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, tbl->flags); MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; MDBX_val key, data; err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); while (err == MDBX_SUCCESS) { err = chk_check_break(scope); if (unlikely(err)) goto bailout; bool bad_key = false; if (key.iov_len > maxkeysize) { chk_object_issue(scope, "entry", record_count, "key length exceeds max-key-size", "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); bad_key = true; } else if ((tbl->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && key.iov_len != 4) { chk_object_issue(scope, "entry", record_count, "wrong key length", "%" PRIuPTR " != 4or8", key.iov_len); bad_key = true; } bool bad_data = false; if ((tbl->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && data.iov_len != 4) { chk_object_issue(scope, "entry", record_count, "wrong data length", "%" PRIuPTR " != 4or8", data.iov_len); bad_data = true; } if (prev_key.iov_base) { if (prev_data.iov_base && !bad_data && (tbl->flags & MDBX_DUPFIXED) && prev_data.iov_len != data.iov_len) { chk_object_issue(scope, "entry", record_count, "different data length", "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, data.iov_len); bad_data = true; } if (!bad_key) { int cmp = mdbx_cmp(txn, dbi, &key, &prev_key); if (cmp == 0) { ++dups; if ((tbl->flags & MDBX_DUPSORT) == 0) { chk_object_issue(scope, "entry", record_count, "duplicated entries", nullptr); if (prev_data.iov_base && data.iov_len == prev_data.iov_len && memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) chk_object_issue(scope, "entry", record_count, "complete duplicate", nullptr); } else if (!bad_data && prev_data.iov_base) { cmp = mdbx_dcmp(txn, dbi, &data, &prev_data); if (cmp == 0) chk_object_issue(scope, "entry", record_count, "complete duplicate", nullptr); else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) chk_object_issue(scope, "entry", record_count, "wrong order of multi-values", nullptr); } } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) chk_object_issue(scope, "entry", record_count, "wrong order of entries", nullptr); } } if (!bad_key) { if (!prev_key.iov_base && (tbl->flags & MDBX_INTEGERKEY)) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed key-size %" PRIuSIZE, key.iov_len)); prev_key = key; } if (!bad_data) { if (!prev_data.iov_base && (tbl->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "fixed data-size %" PRIuSIZE, data.iov_len)); prev_data = data; } record_count++; histogram_acc(key.iov_len, &tbl->histogram.key_len); histogram_acc(data.iov_len, &tbl->histogram.val_len); const node_t *const node = page_node(cursor->pg[cursor->top], cursor->ki[cursor->top]); if (node_flags(node) == N_TREE) { if (dbi != MAIN_DBI || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) chk_object_issue(scope, "entry", record_count, "unexpected table", "node-flags 0x%x", node_flags(node)); else if (data.iov_len != sizeof(tree_t)) chk_object_issue(scope, "entry", record_count, "wrong table node size", "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, sizeof(tree_t)); else if (scope->stage == MDBX_chk_maindb) /* подсчитываем table при первом проходе */ sub_databases += 1; else { /* обработка table при втором проходе */ tree_t aligned_db; memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); walk_tbl_t tbl_info = {.name = key}; tbl_info.internal = &aligned_db; MDBX_chk_table_t *table; err = chk_get_tbl(scope, &tbl_info, &table); if (unlikely(err)) goto bailout; if (table->cookie) { err = chk_scope_begin( chk, 0, MDBX_chk_tables, table, &usr->result.problems_kv, "Processing table %s...", chk_v2a(chk, &table->name)); if (likely(!err)) { err = chk_db(usr->scope, (MDBX_dbi)-1, table, chk_handle_kv); if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) usr->result.table_processed += 1; } err = chk_scope_restore(scope, err); if (unlikely(err)) goto bailout; } else chk_line_end(chk_flush( chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s...", chk_v2a(chk, &table->name)))); } } else if (handler) { err = handler(scope, tbl, record_count, &key, &data); if (unlikely(err)) goto bailout; } err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); } err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") : MDBX_SUCCESS; if (err == MDBX_SUCCESS && record_count != db->items) chk_scope_issue(scope, "different number of entries %" PRIuSIZE " != %" PRIu64, record_count, db->items); bailout: if (cursor) { if (handler) { if (tbl->histogram.key_len.count) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); line = histogram_dist(line, &tbl->histogram.key_len, "key length density", "0/1", false); chk_line_feed(line); line = histogram_dist(line, &tbl->histogram.val_len, "value length density", "0/1", false); chk_line_end(line); } if (scope->stage == MDBX_chk_maindb) usr->result.table_total = sub_databases; if (chk->cb->table_conclude) err = chk->cb->table_conclude(usr, tbl, cursor, err); MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); if (dups || (tbl->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP))) line = chk_print(line, " %" PRIuSIZE " dups,", dups); if (sub_databases || dbi == MAIN_DBI) line = chk_print(line, " %" PRIuSIZE " tables,", sub_databases); line = chk_print(line, " %" PRIuSIZE " key's bytes," " %" PRIuSIZE " data's bytes," " %" PRIuSIZE " problem(s)", tbl->histogram.key_len.amount, tbl->histogram.val_len.amount, scope->subtotal_issues); chk_line_end(chk_flush(line)); } mdbx_cursor_close(cursor); if (!txn->cursors[dbi] && (txn->dbi_state[dbi] & DBI_FRESH)) mdbx_dbi_close(env, dbi); } return err; } __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, MDBX_chk_table_t *tbl, const size_t record_number, const MDBX_val *key, const MDBX_val *data) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; assert(tbl == &chk->table_gc); (void)tbl; const char *bad = ""; pgno_t *iptr = data->iov_base; if (key->iov_len != sizeof(txnid_t)) chk_object_issue(scope, "entry", record_number, "wrong txn-id size", "key-size %" PRIuSIZE, key->iov_len); else { txnid_t txnid; memcpy(&txnid, key->iov_base, sizeof(txnid)); if (txnid < 1 || txnid > usr->txn->txnid) chk_object_issue(scope, "entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid); else { if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, data->iov_len); size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; if (number > PAGELIST_LIMIT) chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, number); else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { chk_object_issue(scope, "entry", txnid, "trimmed idl", "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", (number + 1) * sizeof(pgno_t), data->iov_len); number = data->iov_len / sizeof(pgno_t) - 1; } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= /* LY: allow gap up to one page. it is ok * and better than shink-and-retry inside gc_update() */ usr->env->ps) chk_object_issue(scope, "entry", txnid, "extra idl space", "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", (number + 1) * sizeof(pgno_t), data->iov_len); usr->result.gc_pages += number; if (chk->envinfo.mi_latter_reader_txnid > txnid) usr->result.reclaimable_pages += number; size_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->geo.first_unallocated; size_t span = 1; for (size_t i = 0; i < number; ++i) { const size_t pgno = iptr[i]; if (pgno < NUM_METAS) chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " < meta-pages %u", pgno, NUM_METAS); else if (pgno >= usr->result.backed_pages) chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno, usr->result.backed_pages); else if (pgno >= usr->result.alloc_pages) chk_object_issue(scope, "entry", txnid, "wrong idl entry", "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno, usr->result.alloc_pages - 1); else { if (MDBX_PNL_DISORDERED(prev, pgno)) { bad = " [bad sequence]"; chk_object_issue( scope, "entry", txnid, "bad sequence", "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, pgno); } if (chk->pagemap) { const intptr_t id = chk->pagemap[pgno]; if (id == 0) chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; else if (id > 0) { assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->table)); chk_object_issue(scope, "page", pgno, "already used", "by %s", chk_v2a(chk, &chk->table[id - 1]->name)); } else chk_object_issue(scope, "page", pgno, "already listed in GC", nullptr); } } prev = pgno; while (i + span < number && iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) : pgno_sub(pgno, span))) ++span; } if (tbl->cookie) { chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), "transaction %" PRIaTXN ", %" PRIuSIZE " pages, maxspan %" PRIuSIZE "%s", txnid, number, span, bad)); for (size_t i = 0; i < number; i += span) { const size_t pgno = iptr[i]; for (span = 1; i + span < number && iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) : pgno_sub(pgno, span)); ++span) ; histogram_acc(span, &tbl->histogram.nested_tree); MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); if (line) { if (span > 1) line = chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); else line = chk_print(line, "%9" PRIuSIZE, pgno); chk_line_end(line); int err = chk_check_break(scope); if (err) return err; } } } } } return chk_check_break(scope); } __cold static int env_chk(MDBX_chk_scope_t *const scope) { MDBX_chk_internal_t *const chk = scope->internal; MDBX_chk_context_t *const usr = chk->usr; MDBX_env *const env = usr->env; MDBX_txn *const txn = usr->txn; int err = env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); if (unlikely(err)) return chk_error_rc(scope, err, "env_info"); MDBX_chk_line_t *line = chk_puts(chk_line_begin(scope, MDBX_chk_info - (1 << MDBX_chk_severity_prio_shift)), "dxb-id "); if (chk->envinfo.mi_dxbid.x | chk->envinfo.mi_dxbid.y) line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, chk->envinfo.mi_dxbid.x, chk->envinfo.mi_dxbid.y); else line = chk_puts(line, "is absent"); chk_line_end(line); line = chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, chk->envinfo.mi_bootid.current.x, chk->envinfo.mi_bootid.current.y); else line = chk_puts(line, "is unavailable"); chk_line_end(line); err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize); if (unlikely(err)) return chk_error_rc(scope, err, "osal_filesize"); //-------------------------------------------------------------------------- err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, &usr->result.problems_meta, "Peek the meta-pages..."); if (likely(!err)) { MDBX_chk_scope_t *const inner = usr->scope; const uint64_t dxbfile_pages = env->dxb_mmap.filesize >> env->ps2ln; usr->result.alloc_pages = txn->geo.first_unallocated; usr->result.backed_pages = bytes2pgno(env, env->dxb_mmap.current); if (unlikely(usr->result.backed_pages > dxbfile_pages)) chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, usr->result.backed_pages, dxbfile_pages); if (unlikely(dxbfile_pages < NUM_METAS)) chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, NUM_METAS); if (unlikely(usr->result.backed_pages < NUM_METAS)) chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, NUM_METAS); if (unlikely(usr->result.backed_pages < NUM_METAS)) { chk_scope_issue(inner, "backed-pages %zu < num-metas %u", usr->result.backed_pages, NUM_METAS); return MDBX_CORRUPTED; } if (unlikely(dxbfile_pages < NUM_METAS)) { chk_scope_issue(inner, "backed-pages %zu < num-metas %u", usr->result.backed_pages, NUM_METAS); return MDBX_CORRUPTED; } if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", usr->result.backed_pages, (size_t)MAX_PAGENO + 1); usr->result.backed_pages = MAX_PAGENO + 1; } if ((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { if (unlikely(usr->result.backed_pages > dxbfile_pages)) { chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, usr->result.backed_pages, dxbfile_pages); usr->result.backed_pages = (size_t)dxbfile_pages; } if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", usr->result.alloc_pages, usr->result.backed_pages); usr->result.alloc_pages = usr->result.backed_pages; } } else { /* DB may be shrunk by writer down to the allocated (but unused) pages. */ if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", usr->result.alloc_pages, usr->result.backed_pages); usr->result.alloc_pages = usr->result.backed_pages; } if (unlikely(usr->result.alloc_pages > dxbfile_pages)) { chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, usr->result.alloc_pages, dxbfile_pages); usr->result.alloc_pages = (size_t)dxbfile_pages; } if (unlikely(usr->result.backed_pages > dxbfile_pages)) usr->result.backed_pages = (size_t)dxbfile_pages; } line = chk_line_feed(chk_print( chk_line_begin(inner, MDBX_chk_info), "pagesize %u (%u system), max keysize %u..%u" ", max readers %u", env->ps, globals.sys_pagesize, mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->max_readers)); line = chk_line_feed( chk_print_size(line, "mapsize ", env->dxb_mmap.current, nullptr)); if (txn->geo.lower == txn->geo.upper) line = chk_print_size( line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); else { line = chk_print_size( line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", "); line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", "); line = chk_line_feed( chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); line = chk_print_size( line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); } tASSERT(txn, txn->geo.now == chk->envinfo.mi_geo.current / chk->envinfo.mi_dxb_pagesize); chk_line_end(chk_print(line, ", %u pages", txn->geo.now)); #if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG if (txn->geo.shrink_pv && txn->geo.now != txn->geo.upper && scope->verbosity >= MDBX_chk_verbose) { line = chk_line_begin(inner, MDBX_chk_notice); chk_line_feed(chk_print( line, " > WARNING: Due Windows system limitations a file couldn't")); chk_line_feed(chk_print( line, " > be truncated while the database is opened. So, the size")); chk_line_feed(chk_print( line, " > database file of may by large than the database itself,")); chk_line_end(chk_print( line, " > until it will be closed or reopened in read-write mode.")); } #endif /* Windows || Debug */ chk_verbose_meta(inner, 0); chk_verbose_meta(inner, 1); chk_verbose_meta(inner, 2); if (env->stuck_meta >= 0) { chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing), "skip checking meta-pages since the %u" " is selected for verification", env->stuck_meta)); line = chk_line_feed( chk_print(chk_line_begin(inner, MDBX_chk_resolution), "transactions: recent %" PRIu64 ", " "selected for verification %" PRIu64 ", lag %" PRIi64, chk->envinfo.mi_recent_txnid, chk->envinfo.mi_meta_txnid[env->stuck_meta], chk->envinfo.mi_recent_txnid - chk->envinfo.mi_meta_txnid[env->stuck_meta])); chk_line_end(line); } else { chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs check for meta-pages clashes")); const unsigned meta_clash_mask = meta_eq_mask(&chk->troika); if (meta_clash_mask & 1) chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1); if (meta_clash_mask & 2) chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2); if (meta_clash_mask & 4) chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0); const unsigned prefer_steady_metanum = chk->troika.prefer_steady; const uint64_t prefer_steady_txnid = chk->troika.txnid[prefer_steady_metanum]; const unsigned recent_metanum = chk->troika.recent; const uint64_t recent_txnid = chk->troika.txnid[recent_metanum]; if (env->flags & MDBX_EXCLUSIVE) { chk_line_end( chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs full check recent-txn-id with meta-pages")); eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid); if (prefer_steady_txnid != recent_txnid) { if ((chk->flags & MDBX_CHK_READWRITE) != 0 && (env->flags & MDBX_RDONLY) == 0 && recent_txnid > prefer_steady_txnid && (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) != 0 && chk->envinfo.mi_bootid.current.x == chk->envinfo.mi_bootid.meta[recent_metanum].x && chk->envinfo.mi_bootid.current.y == chk->envinfo.mi_bootid.meta[recent_metanum].y) { chk_line_end( chk_print(chk_line_begin(inner, MDBX_chk_verbose), "recent meta-%u is weak, but boot-id match current" " (will synced upon successful check)", recent_metanum)); } else chk_scope_issue( inner, "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64 ")", prefer_steady_metanum, prefer_steady_txnid, recent_txnid); } } else if (chk->write_locked) { chk_line_end( chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs lite check recent-txn-id with meta-pages (not a " "monopolistic mode)")); if (recent_txnid != chk->envinfo.mi_recent_txnid) { chk_scope_issue(inner, "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 " != %" PRIi64 ")", recent_metanum, recent_txnid, chk->envinfo.mi_recent_txnid); } } else { chk_line_end(chk_puts( chk_line_begin(inner, MDBX_chk_verbose), "skip check recent-txn-id with meta-pages (monopolistic or " "read-write mode only)")); } chk_line_end(chk_print( chk_line_begin(inner, MDBX_chk_resolution), "transactions: recent %" PRIu64 ", latter reader %" PRIu64 ", lag %" PRIi64, chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); } } err = chk_scope_restore(scope, err); //-------------------------------------------------------------------------- const char *const subj_tree = "B-Trees"; if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skipping %s traversal...", subj_tree)); else { err = chk_scope_begin( chk, -1, MDBX_chk_tree, nullptr, &usr->result.tree_problems, "Traversal %s by txn#%" PRIaTXN "...", subj_tree, txn->txnid); if (likely(!err)) err = chk_tree(usr->scope); if (usr->result.tree_problems && usr->result.gc_tree_problems == 0) usr->result.gc_tree_problems = usr->result.tree_problems; if (usr->result.tree_problems && usr->result.kv_tree_problems == 0) usr->result.kv_tree_problems = usr->result.tree_problems; chk_scope_restore(scope, err); } const char *const subj_gc = chk_v2a(chk, MDBX_CHK_GC); if (usr->result.gc_tree_problems > 0) chk_line_end(chk_print( chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", subj_gc, subj_tree, usr->result.problems_gc = usr->result.gc_tree_problems)); else { err = chk_scope_begin( chk, -1, MDBX_chk_gc, &chk->table_gc, &usr->result.problems_gc, "Processing %s by txn#%" PRIaTXN "...", subj_gc, txn->txnid); if (likely(!err)) err = chk_db(usr->scope, FREE_DBI, &chk->table_gc, chk_handle_gc); line = chk_line_begin(scope, MDBX_chk_info); if (line) { histogram_print(scope, line, &chk->table_gc.histogram.nested_tree, "span(s)", "single", false); chk_line_end(line); } if (usr->result.problems_gc == 0 && (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages; if (usr->result.processed_pages != used_pages) chk_scope_issue(usr->scope, "used pages mismatch (%" PRIuSIZE "(walked) != %" PRIuSIZE "(allocated - GC))", usr->result.processed_pages, used_pages); if (usr->result.unused_pages != usr->result.gc_pages) chk_scope_issue(usr->scope, "GC pages mismatch (%" PRIuSIZE "(expected) != %" PRIuSIZE "(GC))", usr->result.unused_pages, usr->result.gc_pages); } } chk_scope_restore(scope, err); //-------------------------------------------------------------------------- err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, "Page allocation:"); const double percent_boundary_reciprocal = 100.0 / txn->geo.upper; const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages; const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages; const size_t available2boundary = txn->geo.upper - usr->result.alloc_pages + usr->result.reclaimable_pages; const size_t available2backed = usr->result.backed_pages - usr->result.alloc_pages + usr->result.reclaimable_pages; const size_t remained2boundary = txn->geo.upper - usr->result.alloc_pages; const size_t remained2backed = usr->result.backed_pages - usr->result.alloc_pages; const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) ? usr->result.alloc_pages - usr->result.gc_pages : usr->result.processed_pages; line = chk_line_begin(usr->scope, MDBX_chk_info); line = chk_print(line, "backed by file: %" PRIuSIZE " pages (%.1f%%)" ", %" PRIuSIZE " left to boundary (%.1f%%)", usr->result.backed_pages, usr->result.backed_pages * percent_boundary_reciprocal, txn->geo.upper - usr->result.backed_pages, (txn->geo.upper - usr->result.backed_pages) * percent_boundary_reciprocal); line = chk_line_feed(line); line = chk_print( line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "used", used, used * percent_backed_reciprocal, used * percent_boundary_reciprocal); line = chk_line_feed(line); line = chk_print( line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)", "remained", remained2backed, remained2backed * percent_backed_reciprocal, remained2boundary, remained2boundary * percent_boundary_reciprocal); line = chk_line_feed(line); line = chk_print( line, "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", usr->result.reclaimable_pages, usr->result.reclaimable_pages * percent_backed_reciprocal, usr->result.reclaimable_pages * percent_boundary_reciprocal, usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal, usr->result.gc_pages * percent_boundary_reciprocal); line = chk_line_feed(line); line = chk_print( line, "detained by reader(s): %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" ", %u reader(s), lag %" PRIi64, detained, detained * percent_backed_reciprocal, detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders, chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); line = chk_line_feed(line); line = chk_print( line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", "allocated", usr->result.alloc_pages, usr->result.alloc_pages * percent_backed_reciprocal, usr->result.alloc_pages * percent_boundary_reciprocal); line = chk_line_feed(line); line = chk_print(line, "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE " to boundary (%.1f%% of boundary)", "available", available2backed, available2backed * percent_backed_reciprocal, available2boundary, available2boundary * percent_boundary_reciprocal); chk_line_end(line); line = chk_line_begin(usr->scope, MDBX_chk_resolution); line = chk_print(line, "%s %" PRIaPGNO " pages", (txn->geo.upper == txn->geo.now) ? "total" : "upto", txn->geo.upper); line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", usr->result.backed_pages, usr->result.backed_pages * percent_boundary_reciprocal); line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", usr->result.alloc_pages, usr->result.alloc_pages * percent_boundary_reciprocal); line = chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, available2boundary * percent_boundary_reciprocal); chk_line_end(line); chk_scope_restore(scope, err); //-------------------------------------------------------------------------- const char *const subj_main = chk_v2a(chk, MDBX_CHK_MAIN); if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s...", subj_main)); else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0) chk_line_end(chk_print( chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", subj_main, subj_tree, usr->result.problems_kv = usr->result.kv_tree_problems)); else { err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->table_main, &usr->result.problems_kv, "Processing %s...", subj_main); if (likely(!err)) err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, chk_handle_kv); chk_scope_restore(scope, err); const char *const subj_tables = "table(s)"; if (usr->result.problems_kv && usr->result.table_total) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skip processing %s", subj_tables)); else if (usr->result.problems_kv == 0 && usr->result.table_total == 0) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", subj_tables)); else if (usr->result.problems_kv == 0 && usr->result.table_total) { err = chk_scope_begin( chk, 1, MDBX_chk_tables, nullptr, &usr->result.problems_kv, "Processing %s by txn#%" PRIaTXN "...", subj_tables, txn->txnid); if (!err) err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, nullptr); if (usr->scope->subtotal_issues) chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), "processed %" PRIuSIZE " of %" PRIuSIZE " %s, %" PRIuSIZE " problems(s)", usr->result.table_processed, usr->result.table_total, subj_tables, usr->scope->subtotal_issues)); } chk_scope_restore(scope, err); } return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, nullptr, nullptr)); } __cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) { if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && ctx->internal->problem_counter && ctx->scope)) { *ctx->internal->problem_counter += 1; ctx->scope->subtotal_issues += 1; return MDBX_SUCCESS; } return MDBX_EINVAL; } __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, MDBX_chk_context_t *ctx, const MDBX_chk_flags_t flags, MDBX_chk_severity_t verbosity, unsigned timeout_seconds_16dot16) { int err, rc = check_env(env, false); if (unlikely(rc != MDBX_SUCCESS)) return rc; if (unlikely(!cb || !ctx || ctx->internal)) return MDBX_EINVAL; MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t)); if (unlikely(!chk)) return MDBX_ENOMEM; chk->cb = cb; chk->usr = ctx; chk->usr->internal = chk; chk->usr->env = env; chk->flags = flags; chk->table_gc.id = -1; chk->table_gc.name.iov_base = MDBX_CHK_GC; chk->table[FREE_DBI] = &chk->table_gc; chk->table_main.id = -1; chk->table_main.name.iov_base = MDBX_CHK_MAIN; chk->table[MAIN_DBI] = &chk->table_main; chk->monotime_timeout = timeout_seconds_16dot16 ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() : 0; chk->usr->scope_nesting = 0; chk->usr->result.tables = (const void *)&chk->table; MDBX_chk_scope_t *const top = chk->scope_stack; top->verbosity = verbosity; top->internal = chk; // init rc = chk_scope_end( chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); // lock if (likely(!rc)) rc = chk_scope_begin( chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); if (likely(!rc) && (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && (flags & MDBX_CHK_READWRITE)) { rc = mdbx_txn_lock(env, false); if (unlikely(rc)) chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); else chk->write_locked = true; } if (likely(!rc)) { rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn); if (unlikely(rc)) chk_error_rc(ctx->scope, rc, "mdbx_txn_begin"); } chk_scope_end(chk, rc); // doit if (likely(!rc)) { chk->table_gc.flags = ctx->txn->dbs[FREE_DBI].flags; chk->table_main.flags = ctx->txn->dbs[MAIN_DBI].flags; rc = env_chk(top); } // unlock if (ctx->txn || chk->write_locked) { chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr); if (ctx->txn) { err = mdbx_txn_abort(ctx->txn); if (err && !rc) rc = err; ctx->txn = nullptr; } if (chk->write_locked) mdbx_txn_unlock(env); rc = chk_scope_end(chk, rc); } // finalize err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr); rc = chk_scope_end(chk, err ? err : rc); chk_dispose(chk); return rc; }