mirror of
https://github.com/isar/libmdbx.git
synced 2025-11-07 07:18:56 +08:00
mdbx: изменение лицензии и реструктуризация исходного кода.
This commit is contained in:
314
src/walk.c
Normal file
314
src/walk.c
Normal file
@@ -0,0 +1,314 @@
|
||||
/// \copyright SPDX-License-Identifier: Apache-2.0
|
||||
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
typedef struct walk_ctx {
|
||||
void *userctx;
|
||||
walk_options_t options;
|
||||
int deep;
|
||||
walk_func *visitor;
|
||||
MDBX_txn *txn;
|
||||
MDBX_cursor *cursor;
|
||||
} walk_ctx_t;
|
||||
|
||||
__cold static int walk_sdb(walk_ctx_t *ctx, walk_sdb_t *sdb);
|
||||
|
||||
static page_type_t walk_page_type(const page_t *mp) {
|
||||
if (mp)
|
||||
switch (mp->flags & ~P_SPILLED) {
|
||||
case P_BRANCH:
|
||||
return page_branch;
|
||||
case P_LEAF:
|
||||
return page_leaf;
|
||||
case P_LEAF | P_DUPFIX:
|
||||
return page_dupfix_leaf;
|
||||
case P_LARGE:
|
||||
return page_large;
|
||||
}
|
||||
return page_broken;
|
||||
}
|
||||
|
||||
static page_type_t walk_subpage_type(const page_t *sp) {
|
||||
switch (sp->flags & /* ignore legacy P_DIRTY flag */ ~P_LEGACY_DIRTY) {
|
||||
case P_LEAF | P_SUBP:
|
||||
return page_sub_leaf;
|
||||
case P_LEAF | P_DUPFIX | P_SUBP:
|
||||
return page_sub_dupfix_leaf;
|
||||
default:
|
||||
return page_sub_broken;
|
||||
}
|
||||
}
|
||||
|
||||
/* Depth-first tree traversal. */
|
||||
__cold static int walk_pgno(walk_ctx_t *ctx, walk_sdb_t *sdb, const pgno_t pgno,
|
||||
txnid_t parent_txnid) {
|
||||
assert(pgno != P_INVALID);
|
||||
page_t *mp = nullptr;
|
||||
int err = page_get(ctx->cursor, pgno, &mp, parent_txnid);
|
||||
|
||||
const page_type_t type = walk_page_type(mp);
|
||||
const size_t nentries = mp ? page_numkeys(mp) : 0;
|
||||
size_t header_size =
|
||||
(mp && !is_dupfix_leaf(mp)) ? PAGEHDRSZ + mp->lower : PAGEHDRSZ;
|
||||
size_t payload_size = 0;
|
||||
size_t unused_size =
|
||||
(mp ? page_room(mp) : ctx->txn->env->ps - header_size) - payload_size;
|
||||
size_t align_bytes = 0;
|
||||
|
||||
for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) {
|
||||
if (type == page_dupfix_leaf) {
|
||||
/* DUPFIX pages have no entries[] or node headers */
|
||||
payload_size += mp->dupfix_ksize;
|
||||
continue;
|
||||
}
|
||||
|
||||
const node_t *node = page_node(mp, i);
|
||||
header_size += NODESIZE;
|
||||
const size_t node_key_size = node_ks(node);
|
||||
payload_size += node_key_size;
|
||||
|
||||
if (type == page_branch) {
|
||||
assert(i > 0 || node_ks(node) == 0);
|
||||
align_bytes += node_key_size & 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t node_data_size = node_ds(node);
|
||||
assert(type == page_leaf);
|
||||
switch (node_flags(node)) {
|
||||
case 0 /* usual node */:
|
||||
payload_size += node_data_size;
|
||||
align_bytes += (node_key_size + node_data_size) & 1;
|
||||
break;
|
||||
|
||||
case N_BIGDATA /* long data on the large/overflow page */: {
|
||||
const pgno_t large_pgno = node_largedata_pgno(node);
|
||||
const size_t over_payload = node_data_size;
|
||||
const size_t over_header = PAGEHDRSZ;
|
||||
|
||||
assert(err == MDBX_SUCCESS);
|
||||
pgr_t lp = page_get_large(ctx->cursor, large_pgno, mp->txnid);
|
||||
const size_t npages =
|
||||
((err = lp.err) == MDBX_SUCCESS) ? lp.page->pages : 1;
|
||||
const size_t pagesize = pgno2bytes(ctx->txn->env, npages);
|
||||
const size_t over_unused = pagesize - over_payload - over_header;
|
||||
const int rc = ctx->visitor(large_pgno, npages, ctx->userctx, ctx->deep,
|
||||
sdb, pagesize, page_large, err, 1,
|
||||
over_payload, over_header, over_unused);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
|
||||
payload_size += sizeof(pgno_t);
|
||||
align_bytes += node_key_size & 1;
|
||||
} break;
|
||||
|
||||
case N_SUBDATA /* sub-db */: {
|
||||
if (unlikely(node_data_size != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"invalid subDb node size", (unsigned)node_data_size);
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
}
|
||||
header_size += node_data_size;
|
||||
align_bytes += (node_key_size + node_data_size) & 1;
|
||||
} break;
|
||||
|
||||
case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */:
|
||||
if (unlikely(node_data_size != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"invalid sub-tree node size", (unsigned)node_data_size);
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
}
|
||||
header_size += node_data_size;
|
||||
align_bytes += (node_key_size + node_data_size) & 1;
|
||||
break;
|
||||
|
||||
case N_DUPDATA /* short sub-page */: {
|
||||
if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) {
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"invalid sub-page node size", (unsigned)node_data_size);
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
const page_t *const sp = node_data(node);
|
||||
const page_type_t subtype = walk_subpage_type(sp);
|
||||
const size_t nsubkeys = page_numkeys(sp);
|
||||
if (unlikely(subtype == page_sub_broken)) {
|
||||
ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"invalid sub-page flags", sp->flags);
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
}
|
||||
|
||||
size_t subheader_size =
|
||||
is_dupfix_leaf(sp) ? PAGEHDRSZ : PAGEHDRSZ + sp->lower;
|
||||
size_t subunused_size = page_room(sp);
|
||||
size_t subpayload_size = 0;
|
||||
size_t subalign_bytes = 0;
|
||||
|
||||
for (size_t ii = 0; err == MDBX_SUCCESS && ii < nsubkeys; ++ii) {
|
||||
if (subtype == page_sub_dupfix_leaf) {
|
||||
/* DUPFIX pages have no entries[] or node headers */
|
||||
subpayload_size += sp->dupfix_ksize;
|
||||
} else {
|
||||
assert(subtype == page_sub_leaf);
|
||||
const node_t *subnode = page_node(sp, ii);
|
||||
const size_t subnode_size = node_ks(subnode) + node_ds(subnode);
|
||||
subheader_size += NODESIZE;
|
||||
subpayload_size += subnode_size;
|
||||
subalign_bytes += subnode_size & 1;
|
||||
if (unlikely(node_flags(subnode) != 0)) {
|
||||
ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"unexpected sub-node flags", node_flags(subnode));
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const int rc =
|
||||
ctx->visitor(pgno, 0, ctx->userctx, ctx->deep + 1, sdb,
|
||||
node_data_size, subtype, err, nsubkeys, subpayload_size,
|
||||
subheader_size, subunused_size + subalign_bytes);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
|
||||
header_size += subheader_size;
|
||||
unused_size += subunused_size;
|
||||
payload_size += subpayload_size;
|
||||
align_bytes += subalign_bytes + (node_key_size & 1);
|
||||
} break;
|
||||
|
||||
default:
|
||||
ERROR("%s/%d: %s 0x%x", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"invalid node flags", node_flags(node));
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
}
|
||||
}
|
||||
|
||||
const int rc = ctx->visitor(
|
||||
pgno, 1, ctx->userctx, ctx->deep, sdb, ctx->txn->env->ps, type, err,
|
||||
nentries, payload_size, header_size, unused_size + align_bytes);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
|
||||
|
||||
for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) {
|
||||
if (type == page_dupfix_leaf)
|
||||
continue;
|
||||
|
||||
node_t *node = page_node(mp, i);
|
||||
if (type == page_branch) {
|
||||
assert(err == MDBX_SUCCESS);
|
||||
ctx->deep += 1;
|
||||
err = walk_pgno(ctx, sdb, node_pgno(node), mp->txnid);
|
||||
ctx->deep -= 1;
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
if (err == MDBX_RESULT_TRUE)
|
||||
break;
|
||||
return err;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(type == page_leaf);
|
||||
switch (node_flags(node)) {
|
||||
default:
|
||||
continue;
|
||||
|
||||
case N_SUBDATA /* sub-db */:
|
||||
if (unlikely(node_ds(node) != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"invalid sub-tree node size", (unsigned)node_ds(node));
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
} else {
|
||||
tree_t aligned_db;
|
||||
memcpy(&aligned_db, node_data(node), sizeof(aligned_db));
|
||||
walk_sdb_t subdb = {{node_key(node), node_ks(node)}, nullptr, nullptr};
|
||||
subdb.internal = &aligned_db;
|
||||
assert(err == MDBX_SUCCESS);
|
||||
ctx->deep += 1;
|
||||
err = walk_sdb(ctx, &subdb);
|
||||
ctx->deep -= 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case N_SUBDATA | N_DUPDATA /* dupsorted sub-tree */:
|
||||
if (unlikely(node_ds(node) != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED,
|
||||
"invalid dupsort sub-tree node size", (unsigned)node_ds(node));
|
||||
assert(err == MDBX_CORRUPTED);
|
||||
err = MDBX_CORRUPTED;
|
||||
} else {
|
||||
tree_t aligned_db;
|
||||
memcpy(&aligned_db, node_data(node), sizeof(aligned_db));
|
||||
assert(err == MDBX_SUCCESS);
|
||||
err = cursor_dupsort_setup(ctx->cursor, node, mp);
|
||||
if (likely(err == MDBX_SUCCESS)) {
|
||||
assert(ctx->cursor->subcur ==
|
||||
&container_of(ctx->cursor, cursor_couple_t, outer)->inner);
|
||||
ctx->cursor = &ctx->cursor->subcur->cursor;
|
||||
ctx->deep += 1;
|
||||
sdb->nested = &aligned_db;
|
||||
err = walk_pgno(ctx, sdb, aligned_db.root, mp->txnid);
|
||||
sdb->nested = nullptr;
|
||||
ctx->deep -= 1;
|
||||
subcur_t *inner_xcursor = container_of(ctx->cursor, subcur_t, cursor);
|
||||
cursor_couple_t *couple =
|
||||
container_of(inner_xcursor, cursor_couple_t, inner);
|
||||
ctx->cursor = &couple->outer;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
__cold static int walk_sdb(walk_ctx_t *ctx, walk_sdb_t *sdb) {
|
||||
tree_t *const db = sdb->internal;
|
||||
if (unlikely(db->root == P_INVALID))
|
||||
return MDBX_SUCCESS; /* empty db */
|
||||
|
||||
kvx_t kvx = {.clc = {.k = {.lmin = INT_MAX}, .v = {.lmin = INT_MAX}}};
|
||||
cursor_couple_t couple;
|
||||
int rc = cursor_init4walk(&couple, ctx->txn, db, &kvx);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
const uint8_t cursor_checking = (ctx->options & dont_check_keys_ordering)
|
||||
? z_pagecheck | z_ignord
|
||||
: z_pagecheck;
|
||||
couple.outer.checking |= cursor_checking;
|
||||
couple.inner.cursor.checking |= cursor_checking;
|
||||
couple.outer.next = ctx->cursor;
|
||||
couple.outer.top_and_flags = z_disable_tree_search_fastpath;
|
||||
ctx->cursor = &couple.outer;
|
||||
rc = walk_pgno(ctx, sdb, db->root,
|
||||
db->mod_txnid ? db->mod_txnid : ctx->txn->txnid);
|
||||
ctx->cursor = couple.outer.next;
|
||||
return rc;
|
||||
}
|
||||
|
||||
__cold int walk_pages(MDBX_txn *txn, walk_func *visitor, void *user,
|
||||
walk_options_t options) {
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
walk_ctx_t ctx = {
|
||||
.txn = txn, .userctx = user, .visitor = visitor, .options = options};
|
||||
walk_sdb_t sdb = {.name = {.iov_base = MDBX_CHK_GC},
|
||||
.internal = &txn->dbs[FREE_DBI]};
|
||||
rc = walk_sdb(&ctx, &sdb);
|
||||
if (!MDBX_IS_ERROR(rc)) {
|
||||
sdb.name.iov_base = MDBX_CHK_MAIN;
|
||||
sdb.internal = &txn->dbs[MAIN_DBI];
|
||||
rc = walk_sdb(&ctx, &sdb);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
Reference in New Issue
Block a user