libmdbx/src/chk.c

2108 lines
82 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/// \copyright SPDX-License-Identifier: Apache-2.0
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
#include "internals.h"
typedef struct MDBX_chk_internal {
MDBX_chk_context_t *usr;
const struct MDBX_chk_callbacks *cb;
uint64_t monotime_timeout;
size_t *problem_counter;
uint8_t flags;
bool got_break;
bool write_locked;
uint8_t scope_depth;
MDBX_chk_table_t table_gc, table_main;
int16_t *pagemap;
MDBX_chk_table_t *last_lookup;
const void *last_nested;
MDBX_chk_scope_t scope_stack[12];
MDBX_chk_table_t *table[MDBX_MAX_DBI + CORE_DBS];
MDBX_envinfo envinfo;
troika_t troika;
MDBX_val v2a_buf;
} MDBX_chk_internal_t;
__cold static int chk_check_break(MDBX_chk_scope_t *const scope) {
MDBX_chk_internal_t *const chk = scope->internal;
return (chk->got_break || (chk->cb->check_break &&
(chk->got_break = chk->cb->check_break(chk->usr))))
? MDBX_RESULT_TRUE
: MDBX_RESULT_FALSE;
}
__cold static void chk_line_end(MDBX_chk_line_t *line) {
if (likely(line)) {
MDBX_chk_internal_t *chk = line->ctx->internal;
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
if (likely(chk->cb->print_done))
chk->cb->print_done(line);
}
}
__cold __must_check_result static MDBX_chk_line_t *
chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) {
MDBX_chk_internal_t *const chk = scope->internal;
if (severity < MDBX_chk_warning)
mdbx_env_chk_encount_problem(chk->usr);
MDBX_chk_line_t *line = nullptr;
if (likely(chk->cb->print_begin)) {
line = chk->cb->print_begin(chk->usr, severity);
if (likely(line)) {
assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty));
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
line->ctx = chk->usr;
}
}
return line;
}
__cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) {
if (likely(line)) {
MDBX_chk_internal_t *chk = line->ctx->internal;
enum MDBX_chk_severity severity = line->severity;
chk_line_end(line);
line = chk_line_begin(chk->usr->scope, severity);
}
return line;
}
__cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) {
if (likely(line)) {
MDBX_chk_internal_t *chk = line->ctx->internal;
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
if (likely(chk->cb->print_flush)) {
chk->cb->print_flush(line);
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
line->out = line->begin;
}
}
return line;
}
__cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) {
if (likely(line && need)) {
size_t have = line->end - line->out;
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
if (need > have) {
line = chk_flush(line);
have = line->end - line->out;
}
return (need < have) ? need : have;
}
return 0;
}
__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line,
const char *str) {
if (likely(line && str && *str)) {
MDBX_chk_internal_t *chk = line->ctx->internal;
size_t left = strlen(str);
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
if (chk->cb->print_chars) {
chk->cb->print_chars(line, str, left);
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
} else
do {
size_t chunk = chk_print_wanna(line, left);
assert(chunk <= left);
if (unlikely(!chunk))
break;
memcpy(line->out, str, chunk);
line->out += chunk;
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
str += chunk;
left -= chunk;
} while (left);
line->empty = false;
}
return line;
}
__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line,
const char *fmt, va_list args) {
if (likely(line)) {
MDBX_chk_internal_t *chk = line->ctx->internal;
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
if (chk->cb->print_format) {
chk->cb->print_format(line, fmt, args);
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
} else {
va_list ones;
va_copy(ones, args);
const int needed = vsnprintf(nullptr, 0, fmt, ones);
va_end(ones);
if (likely(needed > 0)) {
const size_t have = chk_print_wanna(line, needed);
if (likely(have > 0)) {
int written = vsnprintf(line->out, have, fmt, args);
if (likely(written > 0))
line->out += written;
assert(line->begin <= line->end && line->begin <= line->out &&
line->out <= line->end);
}
}
}
line->empty = false;
}
return line;
}
__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3)
chk_print(MDBX_chk_line_t *line, const char *fmt, ...) {
if (likely(line)) {
// MDBX_chk_internal_t *chk = line->ctx->internal;
va_list args;
va_start(args, fmt);
line = chk_print_va(line, fmt, args);
va_end(args);
line->empty = false;
}
return line;
}
__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line,
const char *prefix,
const uint64_t value,
const char *suffix) {
static const char sf[] =
"KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */
if (likely(line)) {
MDBX_chk_internal_t *chk = line->ctx->internal;
prefix = prefix ? prefix : "";
suffix = suffix ? suffix : "";
if (chk->cb->print_size)
chk->cb->print_size(line, prefix, value, suffix);
else
for (unsigned i = 0;; ++i) {
const unsigned scale = 10 + i * 10;
const uint64_t rounded = value + (UINT64_C(5) << (scale - 10));
const uint64_t integer = rounded >> scale;
const uint64_t fractional =
(rounded - (integer << scale)) * 100u >> scale;
if ((rounded >> scale) <= 1000)
return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix,
value, (unsigned)integer, (unsigned)fractional,
sf[i], suffix);
}
line->empty = false;
}
return line;
}
__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err,
const char *subj) {
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error);
if (line)
chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj,
mdbx_strerror(err), err)));
else
debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)",
subj, mdbx_strerror(err), err);
return err;
}
__cold static void MDBX_PRINTF_ARGS(5, 6)
chk_object_issue(MDBX_chk_scope_t *const scope, const char *object,
uint64_t entry_number, const char *caption,
const char *extra_fmt, ...) {
MDBX_chk_internal_t *const chk = scope->internal;
MDBX_chk_issue_t *issue = chk->usr->scope->issues;
while (issue) {
if (issue->caption == caption) {
issue->count += 1;
break;
} else
issue = issue->next;
}
const bool fresh = issue == nullptr;
if (fresh) {
issue = osal_malloc(sizeof(*issue));
if (likely(issue)) {
issue->caption = caption;
issue->count = 1;
issue->next = chk->usr->scope->issues;
chk->usr->scope->issues = issue;
} else
chk_error_rc(scope, ENOMEM, "adding issue");
}
va_list args;
va_start(args, extra_fmt);
if (chk->cb->issue) {
mdbx_env_chk_encount_problem(chk->usr);
chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args);
} else {
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error);
if (entry_number != UINT64_MAX)
chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption);
else
chk_print(line, "%s: %s", object, caption);
if (extra_fmt)
chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")");
chk_line_end(fresh ? chk_flush(line) : line);
}
va_end(args);
}
__cold static void MDBX_PRINTF_ARGS(2, 3)
chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) {
MDBX_chk_internal_t *const chk = scope->internal;
va_list args;
va_start(args, fmt);
if (likely(chk->cb->issue)) {
mdbx_env_chk_encount_problem(chk->usr);
chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args);
} else
chk_line_end(
chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args));
va_end(args);
}
__cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) {
assert(chk->scope_depth > 0);
MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth;
MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr;
if (!outer || outer->stage != inner->stage) {
if (err == MDBX_SUCCESS && *chk->problem_counter)
err = MDBX_PROBLEM;
else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err))
*chk->problem_counter = 1;
if (chk->problem_counter != &chk->usr->result.total_problems) {
chk->usr->result.total_problems += *chk->problem_counter;
chk->problem_counter = &chk->usr->result.total_problems;
}
if (chk->cb->stage_end)
err = chk->cb->stage_end(chk->usr, inner->stage, err);
}
if (chk->cb->scope_conclude)
err = chk->cb->scope_conclude(chk->usr, outer, inner, err);
chk->usr->scope = outer;
chk->usr->scope_nesting = chk->scope_depth -= 1;
if (outer)
outer->subtotal_issues += inner->subtotal_issues;
if (chk->cb->scope_pop)
chk->cb->scope_pop(chk->usr, outer, inner);
while (inner->issues) {
MDBX_chk_issue_t *next = inner->issues->next;
osal_free(inner->issues);
inner->issues = next;
}
memset(inner, -1, sizeof(*inner));
return err;
}
__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk,
int verbosity_adjustment,
enum MDBX_chk_stage stage,
const void *object, size_t *problems,
const char *fmt, va_list args) {
if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack)))
return MDBX_BACKLOG_DEPLETED;
MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth;
const int verbosity =
outer->verbosity +
(verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift);
MDBX_chk_scope_t *const inner = outer + 1;
memset(inner, 0, sizeof(*inner));
inner->internal = outer->internal;
inner->stage = stage ? stage : (stage = outer->stage);
inner->object = object;
inner->verbosity = (verbosity < MDBX_chk_warning)
? MDBX_chk_warning
: (enum MDBX_chk_severity)verbosity;
if (problems)
chk->problem_counter = problems;
else if (!chk->problem_counter || outer->stage != stage)
chk->problem_counter = &chk->usr->result.total_problems;
if (chk->cb->scope_push) {
const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args);
if (unlikely(err != MDBX_SUCCESS))
return err;
}
chk->usr->scope = inner;
chk->usr->scope_nesting = chk->scope_depth += 1;
if (stage != outer->stage && chk->cb->stage_begin) {
int err = chk->cb->stage_begin(chk->usr, stage);
if (unlikely(err != MDBX_SUCCESS)) {
err = chk_scope_end(chk, err);
assert(err != MDBX_SUCCESS);
return err ? err : MDBX_RESULT_TRUE;
}
}
return MDBX_SUCCESS;
}
__cold static int MDBX_PRINTF_ARGS(6, 7)
chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment,
enum MDBX_chk_stage stage, const void *object,
size_t *problems, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object,
problems, fmt, args);
va_end(args);
return rc;
}
__cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) {
MDBX_chk_internal_t *const chk = target->internal;
assert(target <= chk->usr->scope);
while (chk->usr->scope > target)
err = chk_scope_end(chk, err);
return err;
}
__cold void chk_scope_pop(MDBX_chk_scope_t *const inner) {
if (inner && inner > inner->internal->scope_stack)
chk_scope_restore(inner - 1, MDBX_SUCCESS);
}
__cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4)
chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment,
const char *fmt, ...) {
chk_scope_restore(scope, MDBX_SUCCESS);
va_list args;
va_start(args, fmt);
int err = chk_scope_begin_args(scope->internal, verbosity_adjustment,
scope->stage, nullptr, nullptr, fmt, args);
va_end(args);
return err ? nullptr : scope + 1;
}
__cold static const char *chk_v2a(MDBX_chk_internal_t *chk,
const MDBX_val *val) {
if (val == MDBX_CHK_MAIN)
return "@MAIN";
if (val == MDBX_CHK_GC)
return "@GC";
if (val == MDBX_CHK_META)
return "@META";
const unsigned char *const data = val->iov_base;
const size_t len = val->iov_len;
if (data == MDBX_CHK_MAIN)
return "@MAIN";
if (data == MDBX_CHK_GC)
return "@GC";
if (data == MDBX_CHK_META)
return "@META";
if (!len)
return "<zero-length>";
if (!data)
return "<nullptr>";
if (len > 65536) {
const size_t enough = 42;
if (chk->v2a_buf.iov_len < enough) {
void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough);
if (unlikely(!ptr))
return "<out-of-memory>";
chk->v2a_buf.iov_base = ptr;
chk->v2a_buf.iov_len = enough;
}
snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len,
"<too-long.%" PRIuSIZE ">", len);
return chk->v2a_buf.iov_base;
}
bool printable = true;
bool quoting = false;
size_t xchars = 0;
for (size_t i = 0; i < len && printable; ++i) {
quoting = quoting || !(data[i] == '_' || isalnum(data[i]));
printable =
isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4);
}
size_t need = len + 1;
if (quoting || !printable)
need += len + /* quotes */ 2 + 2 * /* max xchars */ 4;
if (need > chk->v2a_buf.iov_len) {
void *ptr = osal_realloc(chk->v2a_buf.iov_base, need);
if (unlikely(!ptr))
return "<out-of-memory>";
chk->v2a_buf.iov_base = ptr;
chk->v2a_buf.iov_len = need;
}
static const char hex[] = "0123456789abcdef";
char *w = chk->v2a_buf.iov_base;
if (!quoting) {
memcpy(w, data, len);
w += len;
} else if (printable) {
*w++ = '\'';
for (size_t i = 0; i < len; ++i) {
if (data[i] < ' ') {
assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4);
w[0] = '\\';
w[1] = 'x';
w[2] = hex[data[i] >> 4];
w[3] = hex[data[i] & 15];
w += 4;
} else if (strchr("\"'`\\", data[i])) {
assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2);
w[0] = '\\';
w[1] = data[i];
w += 2;
} else {
assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1);
*w++ = data[i];
}
}
*w++ = '\'';
} else {
*w++ = '\\';
*w++ = 'x';
for (size_t i = 0; i < len; ++i) {
assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2);
w[0] = hex[data[i] >> 4];
w[1] = hex[data[i] & 15];
w += 2;
}
}
assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w);
*w = 0;
return chk->v2a_buf.iov_base;
}
__cold static void chk_dispose(MDBX_chk_internal_t *chk) {
assert(chk->table[FREE_DBI] == &chk->table_gc);
assert(chk->table[MAIN_DBI] == &chk->table_main);
for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) {
MDBX_chk_table_t *const sdb = chk->table[i];
if (sdb) {
chk->table[i] = nullptr;
if (chk->cb->table_dispose && sdb->cookie) {
chk->cb->table_dispose(chk->usr, sdb);
sdb->cookie = nullptr;
}
if (sdb != &chk->table_gc && sdb != &chk->table_main) {
osal_free(sdb);
}
}
}
osal_free(chk->v2a_buf.iov_base);
osal_free(chk->pagemap);
chk->usr->internal = nullptr;
chk->usr->scope = nullptr;
chk->pagemap = nullptr;
memset(chk, 0xDD, sizeof(*chk));
osal_free(chk);
}
static size_t div_8s(size_t numerator, size_t divider) {
assert(numerator <= (SIZE_MAX >> 8));
return (numerator << 8) / divider;
}
static size_t mul_8s(size_t quotient, size_t multiplier) {
size_t hi = multiplier * (quotient >> 8);
size_t lo = multiplier * (quotient & 255) + 128;
return hi + (lo >> 8);
}
static void histogram_reduce(struct MDBX_chk_histogram *p) {
const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1;
// ищем пару для слияния с минимальной ошибкой
size_t min_err = SIZE_MAX, min_i = last - 1;
for (size_t i = 0; i < last; ++i) {
const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end,
s1 = p->ranges[i].amount;
const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end,
s2 = p->ranges[i + 1].amount;
const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2;
assert(s1 > 0 && b1 > 0 && b1 < e1);
assert(s2 > 0 && b2 > 0 && b2 < e2);
assert(e1 <= b2);
// за ошибку принимаем площадь изменений на гистограмме при слиянии
const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx);
const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1);
const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2);
const size_t dx = mul_8s(hx, b2 - e1);
const size_t err = d1 + d2 + dx;
if (min_err >= err) {
min_i = i;
min_err = err;
}
}
// объединяем
p->ranges[min_i].end = p->ranges[min_i + 1].end;
p->ranges[min_i].amount += p->ranges[min_i + 1].amount;
p->ranges[min_i].count += p->ranges[min_i + 1].count;
if (min_i < last)
// перемещаем хвост
memmove(p->ranges + min_i, p->ranges + min_i + 1,
(last - min_i) * sizeof(p->ranges[0]));
// обнуляем последний элемент и продолжаем
p->ranges[last].count = 0;
}
static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) {
STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2);
p->amount += n;
p->count += 1;
if (likely(n < 2)) {
p->ones += n;
p->pad += 1;
} else
for (;;) {
const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1;
size_t i = 0;
while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) {
if (n < p->ranges[i].end) {
// значение попадает в существующий интервал
p->ranges[i].amount += n;
p->ranges[i].count += 1;
return;
}
++i;
}
if (p->ranges[last].count == 0) {
// использованы еще не все слоты, добавляем интервал
assert(i < size);
if (p->ranges[i].count) {
// раздвигаем
assert(i < last);
#ifdef __COVERITY__
if (i < last) /* avoid Coverity false-positive issue */
#endif /* __COVERITY__ */
memmove(p->ranges + i + 1, p->ranges + i,
(last - i) * sizeof(p->ranges[0]));
}
p->ranges[i].begin = n;
p->ranges[i].end = n + 1;
p->ranges[i].amount = n;
p->ranges[i].count = 1;
return;
}
histogram_reduce(p);
}
}
__cold static MDBX_chk_line_t *
histogram_dist(MDBX_chk_line_t *line,
const struct MDBX_chk_histogram *histogram, const char *prefix,
const char *first, bool amount) {
line = chk_print(line, "%s:", prefix);
const char *comma = "";
const size_t first_val = amount ? histogram->ones : histogram->pad;
if (first_val) {
chk_print(line, " %s=%" PRIuSIZE, first, first_val);
comma = ",";
}
for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n)
if (histogram->ranges[n].count) {
chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin);
if (histogram->ranges[n].begin != histogram->ranges[n].end - 1)
chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1);
line = chk_print(line, "=%" PRIuSIZE,
amount ? histogram->ranges[n].amount
: histogram->ranges[n].count);
comma = ",";
}
return line;
}
__cold static MDBX_chk_line_t *
histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line,
const struct MDBX_chk_histogram *histogram, const char *prefix,
const char *first, bool amount) {
if (histogram->count) {
line = chk_print(line, "%s %" PRIuSIZE, prefix,
amount ? histogram->amount : histogram->count);
if (scope->verbosity > MDBX_chk_info)
line = chk_puts(
histogram_dist(line, histogram, " (distribution", first, amount),
")");
}
return line;
}
//-----------------------------------------------------------------------------
__cold static int chk_get_sdb(MDBX_chk_scope_t *const scope,
const walk_sdb_t *in, MDBX_chk_table_t **out) {
MDBX_chk_internal_t *const chk = scope->internal;
if (chk->last_lookup &&
chk->last_lookup->name.iov_base == in->name.iov_base) {
*out = chk->last_lookup;
return MDBX_SUCCESS;
}
for (size_t i = 0; i < ARRAY_LENGTH(chk->table); ++i) {
MDBX_chk_table_t *sdb = chk->table[i];
if (!sdb) {
sdb = osal_calloc(1, sizeof(MDBX_chk_table_t));
if (unlikely(!sdb)) {
*out = nullptr;
return chk_error_rc(scope, MDBX_ENOMEM, "alloc_table");
}
chk->table[i] = sdb;
sdb->flags = in->internal->flags;
sdb->id = -1;
sdb->name = in->name;
}
if (sdb->name.iov_base == in->name.iov_base) {
if (sdb->id < 0) {
sdb->id = (int)i;
sdb->cookie =
chk->cb->table_filter
? chk->cb->table_filter(chk->usr, &sdb->name, sdb->flags)
: (void *)(intptr_t)-1;
}
*out = (chk->last_lookup = sdb);
return MDBX_SUCCESS;
}
}
chk_scope_issue(scope, "too many tables > %u",
(unsigned)ARRAY_LENGTH(chk->table) - CORE_DBS - /* meta */ 1);
*out = nullptr;
return MDBX_PROBLEM;
}
//------------------------------------------------------------------------------
__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope,
const unsigned num) {
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose);
MDBX_chk_internal_t *const chk = scope->internal;
if (line) {
MDBX_env *const env = chk->usr->env;
const bool have_bootid = (chk->envinfo.mi_bootid.current.x |
chk->envinfo.mi_bootid.current.y) != 0;
const bool bootid_match =
have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num],
&chk->envinfo.mi_bootid.current,
sizeof(chk->envinfo.mi_bootid.current)) == 0;
const char *status = "stay";
if (num == chk->troika.recent)
status = "head";
else if (num == TROIKA_TAIL(&chk->troika))
status = "tail";
line = chk_print(line, "meta-%u: %s, ", num, status);
switch (chk->envinfo.mi_meta_sign[num]) {
case DATASIGN_NONE:
line = chk_puts(line, "no-sync/legacy");
break;
case DATASIGN_WEAK:
line = chk_print(line, "weak-%s",
have_bootid
? (bootid_match ? "intact (same boot-id)" : "dead")
: "unknown (no boot-id)");
break;
default:
line = chk_puts(line, "steady");
break;
}
const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num];
line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid);
if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y)
line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)",
chk->envinfo.mi_bootid.meta[num].x,
chk->envinfo.mi_bootid.meta[num].y,
bootid_match ? "live" : "not match");
else
line = chk_puts(line, "no boot-id");
if (env->stuck_meta >= 0) {
if (num == (unsigned)env->stuck_meta)
line = chk_print(line, ", %s", "forced for checking");
} else if (meta_txnid > chk->envinfo.mi_recent_txnid &&
(env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE)
line = chk_print(line,
", rolled-back %" PRIu64 " commit(s) (%" PRIu64
" >>> %" PRIu64 ")",
meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid,
chk->envinfo.mi_recent_txnid);
chk_line_end(line);
}
}
__cold static int
chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx,
const int deep, const walk_sdb_t *sdb_info,
const size_t page_size, const page_type_t pagetype,
const MDBX_error_t page_err, const size_t nentries,
const size_t payload_bytes, const size_t header_bytes,
const size_t unused_bytes) {
MDBX_chk_scope_t *const scope = ctx;
MDBX_chk_internal_t *const chk = scope->internal;
MDBX_chk_context_t *const usr = chk->usr;
MDBX_env *const env = usr->env;
MDBX_chk_table_t *sdb;
int err = chk_get_sdb(scope, sdb_info, &sdb);
if (unlikely(err))
return err;
if (deep > 42) {
chk_scope_issue(scope, "too deeply %u", deep);
return MDBX_CORRUPTED /* avoid infinite loop/recursion */;
}
histogram_acc(deep, &sdb->histogram.deep);
usr->result.processed_pages += npages;
const size_t page_bytes = payload_bytes + header_bytes + unused_bytes;
int height = deep + 1;
if (sdb->id >= CORE_DBS)
height -= usr->txn->dbs[MAIN_DBI].height;
const tree_t *nested = sdb_info->nested;
if (nested) {
if (sdb->flags & MDBX_DUPSORT)
height -= sdb_info->internal->height;
else {
chk_object_issue(scope, "nested tree", pgno, "unexpected",
"table %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name),
sdb->flags, deep);
nested = nullptr;
}
} else
chk->last_nested = nullptr;
const char *pagetype_caption;
bool branch = false;
switch (pagetype) {
default:
chk_object_issue(scope, "page", pgno, "unknown page-type",
"type %u, deep %i", (unsigned)pagetype, deep);
pagetype_caption = "unknown";
sdb->pages.other += npages;
break;
case page_broken:
assert(page_err != MDBX_SUCCESS);
pagetype_caption = "broken";
sdb->pages.other += npages;
break;
case page_sub_broken:
assert(page_err != MDBX_SUCCESS);
pagetype_caption = "broken-subpage";
sdb->pages.other += npages;
break;
case page_large:
pagetype_caption = "large";
histogram_acc(npages, &sdb->histogram.large_pages);
if (sdb->flags & MDBX_DUPSORT)
chk_object_issue(scope, "page", pgno, "unexpected",
"type %u, table %s flags 0x%x, deep %i",
(unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags,
deep);
break;
case page_branch:
branch = true;
if (!nested) {
pagetype_caption = "branch";
sdb->pages.branch += 1;
} else {
pagetype_caption = "nested-branch";
sdb->pages.nested_branch += 1;
}
break;
case page_dupfix_leaf:
if (!nested)
chk_object_issue(scope, "page", pgno, "unexpected",
"type %u, table %s flags 0x%x, deep %i",
(unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags,
deep);
/* fall through */
__fallthrough;
case page_leaf:
if (!nested) {
pagetype_caption = "leaf";
sdb->pages.leaf += 1;
if (height != sdb_info->internal->height)
chk_object_issue(scope, "page", pgno, "wrong tree height",
"actual %i != %i table %s", height,
sdb_info->internal->height, chk_v2a(chk, &sdb->name));
} else {
pagetype_caption =
(pagetype == page_leaf) ? "nested-leaf" : "nested-leaf-dupfix";
sdb->pages.nested_leaf += 1;
if (chk->last_nested != nested) {
histogram_acc(height, &sdb->histogram.nested_tree);
chk->last_nested = nested;
}
if (height != nested->height)
chk_object_issue(scope, "page", pgno, "wrong nested-tree height",
"actual %i != %i dupsort-node %s", height,
nested->height, chk_v2a(chk, &sdb->name));
}
break;
case page_sub_dupfix_leaf:
case page_sub_leaf:
pagetype_caption =
(pagetype == page_sub_leaf) ? "subleaf-dupsort" : "subleaf-dupfix";
sdb->pages.nested_subleaf += 1;
if ((sdb->flags & MDBX_DUPSORT) == 0 || nested)
chk_object_issue(scope, "page", pgno, "unexpected",
"type %u, table %s flags 0x%x, deep %i",
(unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags,
deep);
break;
}
if (npages) {
if (sdb->cookie) {
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra);
if (npages == 1)
chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno);
else
chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno,
npages);
chk_line_end(chk_print(line,
" of %s: header %" PRIiPTR ", %s %" PRIiPTR
", payload %" PRIiPTR ", unused %" PRIiPTR
", deep %i",
chk_v2a(chk, &sdb->name), header_bytes,
(pagetype == page_branch) ? "keys" : "entries",
nentries, payload_bytes, unused_bytes, deep));
}
bool already_used = false;
for (unsigned n = 0; n < npages; ++n) {
const size_t spanpgno = pgno + n;
if (spanpgno >= usr->result.alloc_pages) {
chk_object_issue(scope, "page", spanpgno, "wrong page-no",
"%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i",
pagetype_caption, spanpgno, usr->result.alloc_pages,
deep);
sdb->pages.all += 1;
} else if (chk->pagemap[spanpgno]) {
const MDBX_chk_table_t *const rival =
chk->table[chk->pagemap[spanpgno] - 1];
chk_object_issue(scope, "page", spanpgno,
(branch && rival == sdb) ? "loop" : "already used",
"%s-page: by %s, deep %i", pagetype_caption,
chk_v2a(chk, &rival->name), deep);
already_used = true;
} else {
chk->pagemap[spanpgno] = (int16_t)sdb->id + 1;
sdb->pages.all += 1;
}
}
if (already_used)
return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */
: MDBX_SUCCESS;
}
if (MDBX_IS_ERROR(page_err)) {
chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page",
pagetype_caption);
} else {
if (unused_bytes > page_size)
chk_object_issue(scope, "page", pgno, "illegal unused-bytes",
"%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0,
unused_bytes, env->ps);
if (header_bytes < (int)sizeof(long) ||
(size_t)header_bytes >= env->ps - sizeof(long)) {
chk_object_issue(scope, "page", pgno, "illegal header-length",
"%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE,
pagetype_caption, sizeof(long), header_bytes,
env->ps - sizeof(long));
}
if (nentries < 1 || (pagetype == page_branch && nentries < 2)) {
chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty",
"%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE
" entries, deep %i",
pagetype_caption, payload_bytes, nentries, deep);
sdb->pages.empty += 1;
}
if (npages) {
if (page_bytes != page_size) {
chk_object_issue(scope, "page", pgno, "misused",
"%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR
"h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i",
pagetype_caption, page_size, page_bytes, header_bytes,
payload_bytes, unused_bytes, deep);
if (page_size > page_bytes)
sdb->lost_bytes += page_size - page_bytes;
} else {
sdb->payload_bytes += payload_bytes + header_bytes;
usr->result.total_payload_bytes += payload_bytes + header_bytes;
}
}
}
return chk_check_break(scope);
}
__cold static int chk_tree(MDBX_chk_scope_t *const scope) {
MDBX_chk_internal_t *const chk = scope->internal;
MDBX_chk_context_t *const usr = chk->usr;
MDBX_env *const env = usr->env;
MDBX_txn *const txn = usr->txn;
#if defined(_WIN32) || defined(_WIN64)
SetLastError(ERROR_SUCCESS);
#else
errno = 0;
#endif /* Windows */
chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap));
if (!chk->pagemap) {
int err = osal_get_errno();
return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc");
}
if (scope->verbosity > MDBX_chk_info)
chk_scope_push(scope, 0, "Walking pages...");
/* always skip key ordering checking
* to avoid MDBX_CORRUPTED in case custom comparators were used */
usr->result.processed_pages = NUM_METAS;
int err = walk_pages(txn, chk_pgvisitor, scope, dont_check_keys_ordering);
if (MDBX_IS_ERROR(err) && err != MDBX_EINTR)
chk_error_rc(scope, err, "walk_pages");
for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n)
if (!chk->pagemap[n])
usr->result.unused_pages += 1;
MDBX_chk_table_t total;
memset(&total, 0, sizeof(total));
total.pages.all = NUM_METAS;
for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) {
MDBX_chk_table_t *const sdb = chk->table[i];
total.payload_bytes += sdb->payload_bytes;
total.lost_bytes += sdb->lost_bytes;
total.pages.all += sdb->pages.all;
total.pages.empty += sdb->pages.empty;
total.pages.other += sdb->pages.other;
total.pages.branch += sdb->pages.branch;
total.pages.leaf += sdb->pages.leaf;
total.pages.nested_branch += sdb->pages.nested_branch;
total.pages.nested_leaf += sdb->pages.nested_leaf;
total.pages.nested_subleaf += sdb->pages.nested_subleaf;
}
assert(total.pages.all == usr->result.processed_pages);
const size_t total_page_bytes = pgno2bytes(env, total.pages.all);
if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose)
chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution),
"walked %zu pages, left/unused %zu"
", %" PRIuSIZE " problem(s)",
usr->result.processed_pages,
usr->result.unused_pages,
usr->scope->subtotal_issues));
err = chk_scope_restore(scope, err);
if (scope->verbosity > MDBX_chk_info) {
for (size_t i = 0; i < ARRAY_LENGTH(chk->table) && chk->table[i]; ++i) {
MDBX_chk_table_t *const sdb = chk->table[i];
MDBX_chk_scope_t *inner =
chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name));
if (sdb->pages.all == 0)
chk_line_end(
chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty"));
else {
MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info);
if (line) {
line = chk_print(line, "page usage: subtotal %" PRIuSIZE,
sdb->pages.all);
const size_t branch_pages =
sdb->pages.branch + sdb->pages.nested_branch;
const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf +
sdb->pages.nested_subleaf;
if (sdb->pages.other)
line = chk_print(line, ", other %" PRIuSIZE, sdb->pages.other);
if (sdb->pages.other == 0 ||
(branch_pages | leaf_pages | sdb->histogram.large_pages.count) !=
0) {
line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE,
branch_pages, leaf_pages);
if (sdb->histogram.large_pages.count ||
(sdb->flags & MDBX_DUPSORT) == 0) {
line = chk_print(line, ", large %" PRIuSIZE,
sdb->histogram.large_pages.count);
if (sdb->histogram.large_pages.amount |
sdb->histogram.large_pages.count)
line = histogram_print(inner, line, &sdb->histogram.large_pages,
" amount", "single", true);
}
}
line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep,
"tree deep density", "1", false);
if (sdb != &chk->table_gc && sdb->histogram.nested_tree.count) {
line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE,
sdb->histogram.nested_tree.count);
line = histogram_dist(line, &sdb->histogram.nested_tree, " density",
"1", false);
line = chk_print(chk_line_feed(line),
"nested tree(s) pages %" PRIuSIZE
": branch %" PRIuSIZE ", leaf %" PRIuSIZE
", subleaf %" PRIuSIZE,
sdb->pages.nested_branch + sdb->pages.nested_leaf,
sdb->pages.nested_branch, sdb->pages.nested_leaf,
sdb->pages.nested_subleaf);
}
const size_t bytes = pgno2bytes(env, sdb->pages.all);
line = chk_print(
chk_line_feed(line),
"page filling: subtotal %" PRIuSIZE
" bytes (%.1f%%), payload %" PRIuSIZE
" (%.1f%%), unused %" PRIuSIZE " (%.1f%%)",
bytes, bytes * 100.0 / total_page_bytes, sdb->payload_bytes,
sdb->payload_bytes * 100.0 / bytes, bytes - sdb->payload_bytes,
(bytes - sdb->payload_bytes) * 100.0 / bytes);
if (sdb->pages.empty)
line = chk_print(line, ", %" PRIuSIZE " empty pages",
sdb->pages.empty);
if (sdb->lost_bytes)
line =
chk_print(line, ", %" PRIuSIZE " bytes lost", sdb->lost_bytes);
chk_line_end(line);
}
}
chk_scope_restore(scope, 0);
}
}
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution);
line = chk_print(line,
"summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE
" (%.1f%%), unused %" PRIuSIZE " (%.1f%%),"
" average fill %.1f%%",
total_page_bytes, usr->result.total_payload_bytes,
usr->result.total_payload_bytes * 100.0 / total_page_bytes,
total_page_bytes - usr->result.total_payload_bytes,
(total_page_bytes - usr->result.total_payload_bytes) *
100.0 / total_page_bytes,
usr->result.total_payload_bytes * 100.0 / total_page_bytes);
if (total.pages.empty)
line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty);
if (total.lost_bytes)
line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes);
chk_line_end(line);
return err;
}
typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope,
MDBX_chk_table_t *sdb, const size_t record_number,
const MDBX_val *key, const MDBX_val *data);
__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope,
MDBX_chk_table_t *sdb,
const size_t record_number, const MDBX_val *key,
const MDBX_val *data) {
MDBX_chk_internal_t *const chk = scope->internal;
int err = MDBX_SUCCESS;
assert(sdb->cookie);
if (chk->cb->table_handle_kv)
err = chk->cb->table_handle_kv(chk->usr, sdb, record_number, key, data);
return err ? err : chk_check_break(scope);
}
__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi,
MDBX_chk_table_t *sdb, chk_kv_visitor *handler) {
MDBX_chk_internal_t *const chk = scope->internal;
MDBX_chk_context_t *const usr = chk->usr;
MDBX_env *const env = usr->env;
MDBX_txn *const txn = usr->txn;
MDBX_cursor *cursor = nullptr;
size_t record_count = 0, dups = 0, sub_databases = 0;
int err;
if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->flags) {
chk_line_end(
chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error),
"abort processing %s due to a previous error",
chk_v2a(chk, &sdb->name))));
err = MDBX_BAD_TXN;
goto bailout;
}
if (0 > (int)dbi) {
err = dbi_open(
txn, &sdb->name, MDBX_DB_ACCEDE, &dbi,
(chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr,
(chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr);
if (unlikely(err)) {
tASSERT(txn, dbi >= txn->env->n_dbi ||
(txn->env->dbs_flags[dbi] & DB_VALID) == 0);
chk_error_rc(scope, err, "mdbx_dbi_open");
goto bailout;
}
tASSERT(txn, dbi < txn->env->n_dbi &&
(txn->env->dbs_flags[dbi] & DB_VALID) != 0);
}
const tree_t *const db = txn->dbs + dbi;
if (handler) {
const char *key_mode = nullptr;
switch (sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) {
case 0:
key_mode = "usual";
break;
case MDBX_REVERSEKEY:
key_mode = "reserve";
break;
case MDBX_INTEGERKEY:
key_mode = "ordinal";
break;
case MDBX_REVERSEKEY | MDBX_INTEGERKEY:
key_mode = "msgpack";
break;
default:
key_mode = "inconsistent";
chk_scope_issue(scope, "wrong key-mode (0x%x)",
sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY));
}
const char *value_mode = nullptr;
switch (sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED |
MDBX_INTEGERDUP)) {
case 0:
value_mode = "single";
break;
case MDBX_DUPSORT:
value_mode = "multi";
break;
case MDBX_DUPSORT | MDBX_REVERSEDUP:
value_mode = "multi-reverse";
break;
case MDBX_DUPSORT | MDBX_DUPFIXED:
value_mode = "multi-samelength";
break;
case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP:
value_mode = "multi-reverse-samelength";
break;
case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP:
value_mode = "multi-ordinal";
break;
case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
value_mode = "multi-msgpack";
break;
case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
value_mode = "reserved";
break;
default:
value_mode = "inconsistent";
chk_scope_issue(scope, "wrong value-mode (0x%x)",
sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP |
MDBX_DUPFIXED | MDBX_INTEGERDUP));
}
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info);
line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode,
value_mode);
line = chk_print(line, ", flags:");
if (!sdb->flags)
line = chk_print(line, " none");
else {
const uint8_t f[] = {MDBX_DUPSORT,
MDBX_INTEGERKEY,
MDBX_REVERSEKEY,
MDBX_DUPFIXED,
MDBX_REVERSEDUP,
MDBX_INTEGERDUP,
0};
const char *const t[] = {"dupsort", "integerkey", "reversekey",
"dupfix", "reversedup", "integerdup"};
for (size_t i = 0; f[i]; i++)
if (sdb->flags & f[i])
line = chk_print(line, " %s", t[i]);
}
chk_line_end(chk_print(line, " (0x%02X)", sdb->flags));
line = chk_print(chk_line_begin(scope, MDBX_chk_verbose),
"entries %" PRIu64 ", sequence %" PRIu64, db->items,
db->sequence);
if (db->mod_txnid)
line =
chk_print(line, ", last modification txn#%" PRIaTXN, db->mod_txnid);
if (db->root != P_INVALID)
line = chk_print(line, ", root #%" PRIaPGNO, db->root);
chk_line_end(line);
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose),
"b-tree depth %u, pages: branch %" PRIaPGNO
", leaf %" PRIaPGNO ", large %" PRIaPGNO,
db->height, db->branch_pages, db->leaf_pages,
db->large_pages));
if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) {
const size_t branch_pages = sdb->pages.branch + sdb->pages.nested_branch;
const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf;
const size_t subtotal_pages =
db->branch_pages + db->leaf_pages + db->large_pages;
if (subtotal_pages != sdb->pages.all)
chk_scope_issue(
scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")",
"subtotal", subtotal_pages, sdb->pages.all);
if (db->branch_pages != branch_pages)
chk_scope_issue(
scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")",
"branch", db->branch_pages, branch_pages);
if (db->leaf_pages != leaf_pages)
chk_scope_issue(
scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")",
"all-leaf", db->leaf_pages, leaf_pages);
if (db->large_pages != sdb->histogram.large_pages.amount)
chk_scope_issue(
scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")",
"large/overlow", db->large_pages,
sdb->histogram.large_pages.amount);
}
}
err = mdbx_cursor_open(txn, dbi, &cursor);
if (unlikely(err)) {
chk_error_rc(scope, err, "mdbx_cursor_open");
goto bailout;
}
if (chk->flags & MDBX_CHK_IGNORE_ORDER) {
cursor->checking |= z_ignord | z_pagecheck;
if (cursor->subcur)
cursor->subcur->cursor.checking |= z_ignord | z_pagecheck;
}
const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, sdb->flags);
MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0};
MDBX_val key, data;
err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST);
while (err == MDBX_SUCCESS) {
err = chk_check_break(scope);
if (unlikely(err))
goto bailout;
bool bad_key = false;
if (key.iov_len > maxkeysize) {
chk_object_issue(scope, "entry", record_count,
"key length exceeds max-key-size",
"%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize);
bad_key = true;
} else if ((sdb->flags & MDBX_INTEGERKEY) && key.iov_len != 8 &&
key.iov_len != 4) {
chk_object_issue(scope, "entry", record_count, "wrong key length",
"%" PRIuPTR " != 4or8", key.iov_len);
bad_key = true;
}
bool bad_data = false;
if ((sdb->flags & MDBX_INTEGERDUP) && data.iov_len != 8 &&
data.iov_len != 4) {
chk_object_issue(scope, "entry", record_count, "wrong data length",
"%" PRIuPTR " != 4or8", data.iov_len);
bad_data = true;
}
if (prev_key.iov_base) {
if (prev_data.iov_base && !bad_data && (sdb->flags & MDBX_DUPFIXED) &&
prev_data.iov_len != data.iov_len) {
chk_object_issue(scope, "entry", record_count, "different data length",
"%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len,
data.iov_len);
bad_data = true;
}
if (!bad_key) {
int cmp = mdbx_cmp(txn, dbi, &key, &prev_key);
if (cmp == 0) {
++dups;
if ((sdb->flags & MDBX_DUPSORT) == 0) {
chk_object_issue(scope, "entry", record_count, "duplicated entries",
nullptr);
if (prev_data.iov_base && data.iov_len == prev_data.iov_len &&
memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0)
chk_object_issue(scope, "entry", record_count,
"complete duplicate", nullptr);
} else if (!bad_data && prev_data.iov_base) {
cmp = mdbx_dcmp(txn, dbi, &data, &prev_data);
if (cmp == 0)
chk_object_issue(scope, "entry", record_count,
"complete duplicate", nullptr);
else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER))
chk_object_issue(scope, "entry", record_count,
"wrong order of multi-values", nullptr);
}
} else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER))
chk_object_issue(scope, "entry", record_count,
"wrong order of entries", nullptr);
}
}
if (!bad_key) {
if (!prev_key.iov_base && (sdb->flags & MDBX_INTEGERKEY))
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info),
"fixed key-size %" PRIuSIZE, key.iov_len));
prev_key = key;
}
if (!bad_data) {
if (!prev_data.iov_base &&
(sdb->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)))
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info),
"fixed data-size %" PRIuSIZE, data.iov_len));
prev_data = data;
}
record_count++;
histogram_acc(key.iov_len, &sdb->histogram.key_len);
histogram_acc(data.iov_len, &sdb->histogram.val_len);
const node_t *const node =
page_node(cursor->pg[cursor->top], cursor->ki[cursor->top]);
if (node_flags(node) == N_SUBDATA) {
if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED |
MDBX_REVERSEDUP | MDBX_INTEGERDUP)))
chk_object_issue(scope, "entry", record_count,
"unexpected table", "node-flags 0x%x",
node_flags(node));
else if (data.iov_len != sizeof(tree_t))
chk_object_issue(scope, "entry", record_count,
"wrong table node size",
"node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len,
sizeof(tree_t));
else if (scope->stage == MDBX_chk_maindb)
/* подсчитываем table при первом проходе */
sub_databases += 1;
else {
/* обработка table при втором проходе */
tree_t aligned_db;
memcpy(&aligned_db, data.iov_base, sizeof(aligned_db));
walk_sdb_t sdb_info = {.name = key};
sdb_info.internal = &aligned_db;
MDBX_chk_table_t *table;
err = chk_get_sdb(scope, &sdb_info, &table);
if (unlikely(err))
goto bailout;
if (table->cookie) {
err = chk_scope_begin(
chk, 0, MDBX_chk_tables, table, &usr->result.problems_kv,
"Processing table %s...", chk_v2a(chk, &table->name));
if (likely(!err)) {
err = chk_db(usr->scope, (MDBX_dbi)-1, table, chk_handle_kv);
if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE)
usr->result.table_processed += 1;
}
err = chk_scope_restore(scope, err);
if (unlikely(err))
goto bailout;
} else
chk_line_end(chk_flush(
chk_print(chk_line_begin(scope, MDBX_chk_processing),
"Skip processing %s...", chk_v2a(chk, &table->name))));
}
} else if (handler) {
err = handler(scope, sdb, record_count, &key, &data);
if (unlikely(err))
goto bailout;
}
err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT);
}
err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get")
: MDBX_SUCCESS;
if (err == MDBX_SUCCESS && record_count != db->items)
chk_scope_issue(scope,
"different number of entries %" PRIuSIZE " != %" PRIu64,
record_count, db->items);
bailout:
if (cursor) {
if (handler) {
if (sdb->histogram.key_len.count) {
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info);
line = histogram_dist(line, &sdb->histogram.key_len,
"key length density", "0/1", false);
chk_line_feed(line);
line = histogram_dist(line, &sdb->histogram.val_len,
"value length density", "0/1", false);
chk_line_end(line);
}
if (scope->stage == MDBX_chk_maindb)
usr->result.table_total = sub_databases;
if (chk->cb->table_conclude)
err = chk->cb->table_conclude(usr, sdb, cursor, err);
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution);
line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count);
if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED |
MDBX_REVERSEDUP | MDBX_INTEGERDUP)))
line = chk_print(line, " %" PRIuSIZE " dups,", dups);
if (sub_databases || dbi == MAIN_DBI)
line = chk_print(line, " %" PRIuSIZE " tables,", sub_databases);
line = chk_print(line,
" %" PRIuSIZE " key's bytes,"
" %" PRIuSIZE " data's bytes,"
" %" PRIuSIZE " problem(s)",
sdb->histogram.key_len.amount,
sdb->histogram.val_len.amount, scope->subtotal_issues);
chk_line_end(chk_flush(line));
}
mdbx_cursor_close(cursor);
if (!txn->cursors[dbi] && (txn->dbi_state[dbi] & DBI_FRESH))
mdbx_dbi_close(env, dbi);
}
return err;
}
__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope,
MDBX_chk_table_t *sdb,
const size_t record_number, const MDBX_val *key,
const MDBX_val *data) {
MDBX_chk_internal_t *const chk = scope->internal;
MDBX_chk_context_t *const usr = chk->usr;
assert(sdb == &chk->table_gc);
(void)sdb;
const char *bad = "";
pgno_t *iptr = data->iov_base;
if (key->iov_len != sizeof(txnid_t))
chk_object_issue(scope, "entry", record_number, "wrong txn-id size",
"key-size %" PRIuSIZE, key->iov_len);
else {
txnid_t txnid;
memcpy(&txnid, key->iov_base, sizeof(txnid));
if (txnid < 1 || txnid > usr->txn->txnid)
chk_object_issue(scope, "entry", record_number, "wrong txn-id",
"%" PRIaTXN, txnid);
else {
if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t))
chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR,
data->iov_len);
size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0;
if (number > PAGELIST_LIMIT)
chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR,
number);
else if ((number + 1) * sizeof(pgno_t) > data->iov_len) {
chk_object_issue(scope, "entry", txnid, "trimmed idl",
"%" PRIuSIZE " > %" PRIuSIZE " (corruption)",
(number + 1) * sizeof(pgno_t), data->iov_len);
number = data->iov_len / sizeof(pgno_t) - 1;
} else if (data->iov_len - (number + 1) * sizeof(pgno_t) >=
/* LY: allow gap up to one page. it is ok
* and better than shink-and-retry inside gc_update() */
usr->env->ps)
chk_object_issue(scope, "entry", txnid, "extra idl space",
"%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)",
(number + 1) * sizeof(pgno_t), data->iov_len);
usr->result.gc_pages += number;
if (chk->envinfo.mi_latter_reader_txnid > txnid)
usr->result.reclaimable_pages += number;
size_t prev =
MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->geo.first_unallocated;
size_t span = 1;
for (size_t i = 0; i < number; ++i) {
const size_t pgno = iptr[i];
if (pgno < NUM_METAS)
chk_object_issue(scope, "entry", txnid, "wrong idl entry",
"pgno %" PRIuSIZE " < meta-pages %u", pgno,
NUM_METAS);
else if (pgno >= usr->result.backed_pages)
chk_object_issue(scope, "entry", txnid, "wrong idl entry",
"pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno,
usr->result.backed_pages);
else if (pgno >= usr->result.alloc_pages)
chk_object_issue(scope, "entry", txnid, "wrong idl entry",
"pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno,
usr->result.alloc_pages - 1);
else {
if (MDBX_PNL_DISORDERED(prev, pgno)) {
bad = " [bad sequence]";
chk_object_issue(
scope, "entry", txnid, "bad sequence",
"%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev,
(prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i,
pgno);
}
if (chk->pagemap) {
const intptr_t id = chk->pagemap[pgno];
if (id == 0)
chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */;
else if (id > 0) {
assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->table));
chk_object_issue(scope, "page", pgno, "already used", "by %s",
chk_v2a(chk, &chk->table[id - 1]->name));
} else
chk_object_issue(scope, "page", pgno, "already listed in GC",
nullptr);
}
}
prev = pgno;
while (i + span < number &&
iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span)
: pgno_sub(pgno, span)))
++span;
}
if (sdb->cookie) {
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details),
"transaction %" PRIaTXN ", %" PRIuSIZE
" pages, maxspan %" PRIuSIZE "%s",
txnid, number, span, bad));
for (size_t i = 0; i < number; i += span) {
const size_t pgno = iptr[i];
for (span = 1;
i + span < number &&
iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span)
: pgno_sub(pgno, span));
++span)
;
histogram_acc(span, &sdb->histogram.nested_tree);
MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra);
if (line) {
if (span > 1)
line =
chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span);
else
line = chk_print(line, "%9" PRIuSIZE, pgno);
chk_line_end(line);
int err = chk_check_break(scope);
if (err)
return err;
}
}
}
}
}
return chk_check_break(scope);
}
__cold static int env_chk(MDBX_chk_scope_t *const scope) {
MDBX_chk_internal_t *const chk = scope->internal;
MDBX_chk_context_t *const usr = chk->usr;
MDBX_env *const env = usr->env;
MDBX_txn *const txn = usr->txn;
int err =
env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika);
if (unlikely(err))
return chk_error_rc(scope, err, "env_info");
MDBX_chk_line_t *line =
chk_puts(chk_line_begin(scope, MDBX_chk_info -
(1 << MDBX_chk_severity_prio_shift)),
"dxb-id ");
if (chk->envinfo.mi_dxbid.x | chk->envinfo.mi_dxbid.y)
line = chk_print(line, "%016" PRIx64 "-%016" PRIx64,
chk->envinfo.mi_dxbid.x, chk->envinfo.mi_dxbid.y);
else
line = chk_puts(line, "is absent");
chk_line_end(line);
line = chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id ");
if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y)
line = chk_print(line, "%016" PRIx64 "-%016" PRIx64,
chk->envinfo.mi_bootid.current.x,
chk->envinfo.mi_bootid.current.y);
else
line = chk_puts(line, "is unavailable");
chk_line_end(line);
err = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize);
if (unlikely(err))
return chk_error_rc(scope, err, "osal_filesize");
//--------------------------------------------------------------------------
err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr,
&usr->result.problems_meta, "Peek the meta-pages...");
if (likely(!err)) {
MDBX_chk_scope_t *const inner = usr->scope;
const uint64_t dxbfile_pages = env->dxb_mmap.filesize >> env->ps2ln;
usr->result.alloc_pages = txn->geo.first_unallocated;
usr->result.backed_pages = bytes2pgno(env, env->dxb_mmap.current);
if (unlikely(usr->result.backed_pages > dxbfile_pages))
chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64,
usr->result.backed_pages, dxbfile_pages);
if (unlikely(dxbfile_pages < NUM_METAS))
chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages,
NUM_METAS);
if (unlikely(usr->result.backed_pages < NUM_METAS))
chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages,
NUM_METAS);
if (unlikely(usr->result.backed_pages < NUM_METAS)) {
chk_scope_issue(inner, "backed-pages %zu < num-metas %u",
usr->result.backed_pages, NUM_METAS);
return MDBX_CORRUPTED;
}
if (unlikely(dxbfile_pages < NUM_METAS)) {
chk_scope_issue(inner, "backed-pages %zu < num-metas %u",
usr->result.backed_pages, NUM_METAS);
return MDBX_CORRUPTED;
}
if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) {
chk_scope_issue(inner, "backed-pages %zu > max-pages %zu",
usr->result.backed_pages, (size_t)MAX_PAGENO + 1);
usr->result.backed_pages = MAX_PAGENO + 1;
}
if ((env->flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) {
if (unlikely(usr->result.backed_pages > dxbfile_pages)) {
chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64,
usr->result.backed_pages, dxbfile_pages);
usr->result.backed_pages = (size_t)dxbfile_pages;
}
if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) {
chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu",
usr->result.alloc_pages, usr->result.backed_pages);
usr->result.alloc_pages = usr->result.backed_pages;
}
} else {
/* DB may be shrunk by writer down to the allocated (but unused) pages. */
if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) {
chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu",
usr->result.alloc_pages, usr->result.backed_pages);
usr->result.alloc_pages = usr->result.backed_pages;
}
if (unlikely(usr->result.alloc_pages > dxbfile_pages)) {
chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64,
usr->result.alloc_pages, dxbfile_pages);
usr->result.alloc_pages = (size_t)dxbfile_pages;
}
if (unlikely(usr->result.backed_pages > dxbfile_pages))
usr->result.backed_pages = (size_t)dxbfile_pages;
}
line = chk_line_feed(chk_print(
chk_line_begin(inner, MDBX_chk_info),
"pagesize %u (%u system), max keysize %u..%u"
", max readers %u",
env->ps, globals.sys_pagesize,
mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT),
mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->max_readers));
line = chk_line_feed(
chk_print_size(line, "mapsize ", env->dxb_mmap.current, nullptr));
if (txn->geo.lower == txn->geo.upper)
line = chk_print_size(
line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr);
else {
line = chk_print_size(
line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr);
line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", ");
line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", ");
line = chk_line_feed(
chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr));
line = chk_print_size(
line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr);
}
tASSERT(txn, txn->geo.now == chk->envinfo.mi_geo.current /
chk->envinfo.mi_dxb_pagesize);
chk_line_end(chk_print(line, ", %u pages", txn->geo.now));
#if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG
if (txn->geo.shrink_pv && txn->geo.now != txn->geo.upper &&
scope->verbosity >= MDBX_chk_verbose) {
line = chk_line_begin(inner, MDBX_chk_notice);
chk_line_feed(chk_print(
line, " > WARNING: Due Windows system limitations a file couldn't"));
chk_line_feed(chk_print(
line, " > be truncated while the database is opened. So, the size"));
chk_line_feed(chk_print(
line, " > database file of may by large than the database itself,"));
chk_line_end(chk_print(
line, " > until it will be closed or reopened in read-write mode."));
}
#endif /* Windows || Debug */
chk_verbose_meta(inner, 0);
chk_verbose_meta(inner, 1);
chk_verbose_meta(inner, 2);
if (env->stuck_meta >= 0) {
chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing),
"skip checking meta-pages since the %u"
" is selected for verification",
env->stuck_meta));
line = chk_line_feed(
chk_print(chk_line_begin(inner, MDBX_chk_resolution),
"transactions: recent %" PRIu64 ", "
"selected for verification %" PRIu64 ", lag %" PRIi64,
chk->envinfo.mi_recent_txnid,
chk->envinfo.mi_meta_txnid[env->stuck_meta],
chk->envinfo.mi_recent_txnid -
chk->envinfo.mi_meta_txnid[env->stuck_meta]));
chk_line_end(line);
} else {
chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose),
"performs check for meta-pages clashes"));
const unsigned meta_clash_mask = meta_eq_mask(&chk->troika);
if (meta_clash_mask & 1)
chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1);
if (meta_clash_mask & 2)
chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2);
if (meta_clash_mask & 4)
chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0);
const unsigned prefer_steady_metanum = chk->troika.prefer_steady;
const uint64_t prefer_steady_txnid =
chk->troika.txnid[prefer_steady_metanum];
const unsigned recent_metanum = chk->troika.recent;
const uint64_t recent_txnid = chk->troika.txnid[recent_metanum];
if (env->flags & MDBX_EXCLUSIVE) {
chk_line_end(
chk_puts(chk_line_begin(inner, MDBX_chk_verbose),
"performs full check recent-txn-id with meta-pages"));
eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid);
if (prefer_steady_txnid != recent_txnid) {
if ((chk->flags & MDBX_CHK_READWRITE) != 0 &&
(env->flags & MDBX_RDONLY) == 0 &&
recent_txnid > prefer_steady_txnid &&
(chk->envinfo.mi_bootid.current.x |
chk->envinfo.mi_bootid.current.y) != 0 &&
chk->envinfo.mi_bootid.current.x ==
chk->envinfo.mi_bootid.meta[recent_metanum].x &&
chk->envinfo.mi_bootid.current.y ==
chk->envinfo.mi_bootid.meta[recent_metanum].y) {
chk_line_end(
chk_print(chk_line_begin(inner, MDBX_chk_verbose),
"recent meta-%u is weak, but boot-id match current"
" (will synced upon successful check)",
recent_metanum));
} else
chk_scope_issue(
inner,
"steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64
" != %" PRIi64 ")",
prefer_steady_metanum, prefer_steady_txnid, recent_txnid);
}
} else if (chk->write_locked) {
chk_line_end(
chk_puts(chk_line_begin(inner, MDBX_chk_verbose),
"performs lite check recent-txn-id with meta-pages (not a "
"monopolistic mode)"));
if (recent_txnid != chk->envinfo.mi_recent_txnid) {
chk_scope_issue(inner,
"weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64
" != %" PRIi64 ")",
recent_metanum, recent_txnid,
chk->envinfo.mi_recent_txnid);
}
} else {
chk_line_end(chk_puts(
chk_line_begin(inner, MDBX_chk_verbose),
"skip check recent-txn-id with meta-pages (monopolistic or "
"read-write mode only)"));
}
chk_line_end(chk_print(
chk_line_begin(inner, MDBX_chk_resolution),
"transactions: recent %" PRIu64 ", latter reader %" PRIu64
", lag %" PRIi64,
chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid,
chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid));
}
}
err = chk_scope_restore(scope, err);
//--------------------------------------------------------------------------
const char *const subj_tree = "B-Trees";
if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL)
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing),
"Skipping %s traversal...", subj_tree));
else {
err = chk_scope_begin(
chk, -1, MDBX_chk_tree, nullptr, &usr->result.tree_problems,
"Traversal %s by txn#%" PRIaTXN "...", subj_tree, txn->txnid);
if (likely(!err))
err = chk_tree(usr->scope);
if (usr->result.tree_problems && usr->result.gc_tree_problems == 0)
usr->result.gc_tree_problems = usr->result.tree_problems;
if (usr->result.tree_problems && usr->result.kv_tree_problems == 0)
usr->result.kv_tree_problems = usr->result.tree_problems;
chk_scope_restore(scope, err);
}
const char *const subj_gc = chk_v2a(chk, MDBX_CHK_GC);
if (usr->result.gc_tree_problems > 0)
chk_line_end(chk_print(
chk_line_begin(scope, MDBX_chk_processing),
"Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))",
subj_gc, subj_tree,
usr->result.problems_gc = usr->result.gc_tree_problems));
else {
err = chk_scope_begin(
chk, -1, MDBX_chk_gc, &chk->table_gc, &usr->result.problems_gc,
"Processing %s by txn#%" PRIaTXN "...", subj_gc, txn->txnid);
if (likely(!err))
err = chk_db(usr->scope, FREE_DBI, &chk->table_gc, chk_handle_gc);
line = chk_line_begin(scope, MDBX_chk_info);
if (line) {
histogram_print(scope, line, &chk->table_gc.histogram.nested_tree,
"span(s)", "single", false);
chk_line_end(line);
}
if (usr->result.problems_gc == 0 &&
(chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) {
const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages;
if (usr->result.processed_pages != used_pages)
chk_scope_issue(usr->scope,
"used pages mismatch (%" PRIuSIZE
"(walked) != %" PRIuSIZE "(allocated - GC))",
usr->result.processed_pages, used_pages);
if (usr->result.unused_pages != usr->result.gc_pages)
chk_scope_issue(usr->scope,
"GC pages mismatch (%" PRIuSIZE
"(expected) != %" PRIuSIZE "(GC))",
usr->result.unused_pages, usr->result.gc_pages);
}
}
chk_scope_restore(scope, err);
//--------------------------------------------------------------------------
err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr,
"Page allocation:");
const double percent_boundary_reciprocal = 100.0 / txn->geo.upper;
const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages;
const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages;
const size_t available2boundary =
txn->geo.upper - usr->result.alloc_pages + usr->result.reclaimable_pages;
const size_t available2backed = usr->result.backed_pages -
usr->result.alloc_pages +
usr->result.reclaimable_pages;
const size_t remained2boundary = txn->geo.upper - usr->result.alloc_pages;
const size_t remained2backed =
usr->result.backed_pages - usr->result.alloc_pages;
const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL)
? usr->result.alloc_pages - usr->result.gc_pages
: usr->result.processed_pages;
line = chk_line_begin(usr->scope, MDBX_chk_info);
line = chk_print(line,
"backed by file: %" PRIuSIZE " pages (%.1f%%)"
", %" PRIuSIZE " left to boundary (%.1f%%)",
usr->result.backed_pages,
usr->result.backed_pages * percent_boundary_reciprocal,
txn->geo.upper - usr->result.backed_pages,
(txn->geo.upper - usr->result.backed_pages) *
percent_boundary_reciprocal);
line = chk_line_feed(line);
line = chk_print(
line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary",
"used", used, used * percent_backed_reciprocal,
used * percent_boundary_reciprocal);
line = chk_line_feed(line);
line = chk_print(
line,
"%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE
" to boundary (%.1f%% of boundary)",
"remained", remained2backed, remained2backed * percent_backed_reciprocal,
remained2boundary, remained2boundary * percent_boundary_reciprocal);
line = chk_line_feed(line);
line = chk_print(
line,
"reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)"
", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)",
usr->result.reclaimable_pages,
usr->result.reclaimable_pages * percent_backed_reciprocal,
usr->result.reclaimable_pages * percent_boundary_reciprocal,
usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal,
usr->result.gc_pages * percent_boundary_reciprocal);
line = chk_line_feed(line);
line = chk_print(
line,
"detained by reader(s): %" PRIuSIZE
" (%.1f%% of backed, %.1f%% of boundary)"
", %u reader(s), lag %" PRIi64,
detained, detained * percent_backed_reciprocal,
detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders,
chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid);
line = chk_line_feed(line);
line = chk_print(
line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary",
"allocated", usr->result.alloc_pages,
usr->result.alloc_pages * percent_backed_reciprocal,
usr->result.alloc_pages * percent_boundary_reciprocal);
line = chk_line_feed(line);
line = chk_print(line,
"%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE
" to boundary (%.1f%% of boundary)",
"available", available2backed,
available2backed * percent_backed_reciprocal,
available2boundary,
available2boundary * percent_boundary_reciprocal);
chk_line_end(line);
line = chk_line_begin(usr->scope, MDBX_chk_resolution);
line = chk_print(line, "%s %" PRIaPGNO " pages",
(txn->geo.upper == txn->geo.now) ? "total" : "upto",
txn->geo.upper);
line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)",
usr->result.backed_pages,
usr->result.backed_pages * percent_boundary_reciprocal);
line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)",
usr->result.alloc_pages,
usr->result.alloc_pages * percent_boundary_reciprocal);
line =
chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary,
available2boundary * percent_boundary_reciprocal);
chk_line_end(line);
chk_scope_restore(scope, err);
//--------------------------------------------------------------------------
const char *const subj_main = chk_v2a(chk, MDBX_CHK_MAIN);
if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL)
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing),
"Skip processing %s...", subj_main));
else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0)
chk_line_end(chk_print(
chk_line_begin(scope, MDBX_chk_processing),
"Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))",
subj_main, subj_tree,
usr->result.problems_kv = usr->result.kv_tree_problems));
else {
err = chk_scope_begin(chk, 0, MDBX_chk_maindb, &chk->table_main,
&usr->result.problems_kv, "Processing %s...",
subj_main);
if (likely(!err))
err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, chk_handle_kv);
chk_scope_restore(scope, err);
const char *const subj_tables = "table(s)";
if (usr->result.problems_kv && usr->result.table_total)
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing),
"Skip processing %s", subj_tables));
else if (usr->result.problems_kv == 0 && usr->result.table_total == 0)
chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s",
subj_tables));
else if (usr->result.problems_kv == 0 && usr->result.table_total) {
err = chk_scope_begin(
chk, 1, MDBX_chk_tables, nullptr, &usr->result.problems_kv,
"Processing %s by txn#%" PRIaTXN "...", subj_tables, txn->txnid);
if (!err)
err = chk_db(usr->scope, MAIN_DBI, &chk->table_main, nullptr);
if (usr->scope->subtotal_issues)
chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution),
"processed %" PRIuSIZE " of %" PRIuSIZE
" %s, %" PRIuSIZE " problems(s)",
usr->result.table_processed,
usr->result.table_total, subj_tables,
usr->scope->subtotal_issues));
}
chk_scope_restore(scope, err);
}
return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr,
nullptr, nullptr));
}
__cold int mdbx_env_chk_encount_problem(MDBX_chk_context_t *ctx) {
if (likely(ctx && ctx->internal && ctx->internal->usr == ctx &&
ctx->internal->problem_counter && ctx->scope)) {
*ctx->internal->problem_counter += 1;
ctx->scope->subtotal_issues += 1;
return MDBX_SUCCESS;
}
return MDBX_EINVAL;
}
__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb,
MDBX_chk_context_t *ctx, const MDBX_chk_flags_t flags,
MDBX_chk_severity_t verbosity,
unsigned timeout_seconds_16dot16) {
int err, rc = check_env(env, false);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
if (unlikely(!cb || !ctx || ctx->internal))
return MDBX_EINVAL;
MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t));
if (unlikely(!chk))
return MDBX_ENOMEM;
chk->cb = cb;
chk->usr = ctx;
chk->usr->internal = chk;
chk->usr->env = env;
chk->flags = flags;
chk->table_gc.id = -1;
chk->table_gc.name.iov_base = MDBX_CHK_GC;
chk->table[FREE_DBI] = &chk->table_gc;
chk->table_main.id = -1;
chk->table_main.name.iov_base = MDBX_CHK_MAIN;
chk->table[MAIN_DBI] = &chk->table_main;
chk->monotime_timeout =
timeout_seconds_16dot16
? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime()
: 0;
chk->usr->scope_nesting = 0;
chk->usr->result.tables = (const void *)&chk->table;
MDBX_chk_scope_t *const top = chk->scope_stack;
top->verbosity = verbosity;
top->internal = chk;
// init
rc = chk_scope_end(
chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr));
// lock
if (likely(!rc))
rc = chk_scope_begin(
chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...",
(env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read ");
if (likely(!rc) && (env->flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 &&
(flags & MDBX_CHK_READWRITE)) {
rc = mdbx_txn_lock(env, false);
if (unlikely(rc))
chk_error_rc(ctx->scope, rc, "mdbx_txn_lock");
else
chk->write_locked = true;
}
if (likely(!rc)) {
rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn);
if (unlikely(rc))
chk_error_rc(ctx->scope, rc, "mdbx_txn_begin");
}
chk_scope_end(chk, rc);
// doit
if (likely(!rc)) {
chk->table_gc.flags = ctx->txn->dbs[FREE_DBI].flags;
chk->table_main.flags = ctx->txn->dbs[MAIN_DBI].flags;
rc = env_chk(top);
}
// unlock
if (ctx->txn || chk->write_locked) {
chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr);
if (ctx->txn) {
err = mdbx_txn_abort(ctx->txn);
if (err && !rc)
rc = err;
ctx->txn = nullptr;
}
if (chk->write_locked)
mdbx_txn_unlock(env);
rc = chk_scope_end(chk, rc);
}
// finalize
err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr);
rc = chk_scope_end(chk, err ? err : rc);
chk_dispose(chk);
return rc;
}