From 5c1745a7cd5445f52b053265ac5fc64329b80a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 16 Mar 2025 20:08:54 +0300 Subject: [PATCH] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B3=D0=B8=D1=81=D1=82=D0=BE=D0=B3?= =?UTF-8?q?=D1=80=D0=B0=D0=BC=D0=BC=D1=8B=20=D0=BA=D0=BE=D0=BB=D0=B8=D1=87?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=B2=D0=B0=20multi-=D0=B7=D0=BD=D0=B0=D1=87?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B9/=D0=B4=D1=83=D0=B1=D0=BB=D0=B8=D0=BA?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D0=B2=20=D0=B2=20chk.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 ++ src/chk.c | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index fac194de..c43c0c46 100644 --- a/mdbx.h +++ b/mdbx.h @@ -6541,6 +6541,8 @@ typedef struct MDBX_chk_table { struct MDBX_chk_histogram key_len; /// Values length histogram struct MDBX_chk_histogram val_len; + /// Number of multi-values (aka duplicates) histogram + struct MDBX_chk_histogram multival; } histogram; } MDBX_chk_table_t; diff --git a/src/chk.c b/src/chk.c index 9c83795c..7ea451c7 100644 --- a/src/chk.c +++ b/src/chk.c @@ -1139,6 +1139,7 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, MDBX_chk_t const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, tbl->flags); MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; MDBX_val key, data; + size_t dups_count = 0; err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); while (err == MDBX_SUCCESS) { err = chk_check_break(scope); @@ -1162,6 +1163,12 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, MDBX_chk_t } if (prev_key.iov_base) { + if (key.iov_base == prev_key.iov_base) + dups_count += 1; + else { + histogram_acc(dups_count, &tbl->histogram.multival); + dups_count = 0; + } if (prev_data.iov_base && !bad_data && (tbl->flags & MDBX_DUPFIXED) && prev_data.iov_len != data.iov_len) { chk_object_issue(scope, "entry", record_count, "different data length", "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, data.iov_len); @@ -1248,17 +1255,27 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, MDBX_chk_t err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); } + if (prev_key.iov_base) + histogram_acc(dups_count, &tbl->histogram.multival); + err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") : MDBX_SUCCESS; if (err == MDBX_SUCCESS && record_count != db->items) chk_scope_issue(scope, "different number of entries %" PRIuSIZE " != %" PRIu64, record_count, db->items); bailout: if (cursor) { if (handler) { - if (tbl->histogram.key_len.count) { + if (record_count) { MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); line = histogram_dist(line, &tbl->histogram.key_len, "key length density", "0/1", false); chk_line_feed(line); line = histogram_dist(line, &tbl->histogram.val_len, "value length density", "0/1", false); + if (tbl->histogram.multival.amount) { + chk_line_feed(line); + line = histogram_dist(line, &tbl->histogram.multival, "number of multi-values density", "single", false); + chk_line_feed(line); + line = chk_print(line, "number of keys %" PRIuSIZE ", average values per key %.1f", + tbl->histogram.multival.count, record_count / (double)tbl->histogram.multival.count); + } chk_line_end(line); } if (scope->stage == MDBX_chk_maindb)