mdbx: rework internals for using C99 flexible array member (preparation for -fsanitize=undefined).

Change-Id: I0d1836d6108ef379c43231720ef703ff69fc426d
This commit is contained in:
Leonid Yuriev 2019-10-22 22:31:06 +03:00
parent 70350bad81
commit b7d27c1b36
2 changed files with 193 additions and 213 deletions

View File

@ -129,6 +129,14 @@ static __pure_function __inline void *page_data(MDBX_page *mp) {
return mp->mp_ptrs; return mp->mp_ptrs;
} }
static __pure_function __inline const MDBX_page *data_page(const void *data) {
return container_of(data, MDBX_page, mp_ptrs);
}
static __pure_function __inline MDBX_meta *page_meta(MDBX_page *mp) {
return (MDBX_meta *)page_data(mp);
}
/* Number of nodes on a page */ /* Number of nodes on a page */
static __pure_function __inline unsigned page_numkeys(const MDBX_page *mp) { static __pure_function __inline unsigned page_numkeys(const MDBX_page *mp) {
return mp->mp_lower >> 1; return mp->mp_lower >> 1;
@ -2158,7 +2166,7 @@ static __maybe_unused void mdbx_page_list(MDBX_page *mp) {
return; return;
case P_META: case P_META:
mdbx_verbose("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno, mdbx_verbose("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno,
((MDBX_meta *)page_data(mp))->mm_txnid_a.inconsistent); page_meta(mp)->mm_txnid_a.inconsistent);
return; return;
default: default:
mdbx_verbose("Bad page %" PRIaPGNO " flags 0x%X\n", pgno, mp->mp_flags); mdbx_verbose("Bad page %" PRIaPGNO " flags 0x%X\n", pgno, mp->mp_flags);
@ -2621,8 +2629,10 @@ static __hot int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
mdbx_kill_page(txn->mt_env, mp, pgno, npages); mdbx_kill_page(txn->mt_env, mp, pgno, npages);
VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ); VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ);
} }
VALGRIND_MAKE_MEM_NOACCESS(&mp->mp_data, txn->mt_env->me_psize - PAGEHDRSZ); VALGRIND_MAKE_MEM_NOACCESS(page_data(mp),
ASAN_POISON_MEMORY_REGION(&mp->mp_data, txn->mt_env->me_psize - PAGEHDRSZ); pgno2bytes(txn->mt_env, npages) - PAGEHDRSZ);
ASAN_POISON_MEMORY_REGION(page_data(mp),
pgno2bytes(txn->mt_env, npages) - PAGEHDRSZ);
if (unlikely(npages > if (unlikely(npages >
1 /* overflow pages doesn't comes to the loose-list */)) { 1 /* overflow pages doesn't comes to the loose-list */)) {
@ -2919,8 +2929,7 @@ bailout:
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
#define METAPAGE(env, n) (&pgno2page(env, n)->mp_meta) #define METAPAGE(env, n) page_meta(pgno2page(env, n))
#define METAPAGE_END(env) METAPAGE(env, NUM_METAS) #define METAPAGE_END(env) METAPAGE(env, NUM_METAS)
static __inline txnid_t meta_txnid(const MDBX_env *env, const MDBX_meta *meta, static __inline txnid_t meta_txnid(const MDBX_env *env, const MDBX_meta *meta,
@ -3393,10 +3402,8 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
mdbx_debug("db %d use loose page %" PRIaPGNO, DDBI(mc), np->mp_pgno); mdbx_debug("db %d use loose page %" PRIaPGNO, DDBI(mc), np->mp_pgno);
mdbx_tassert(txn, np->mp_pgno < txn->mt_next_pgno); mdbx_tassert(txn, np->mp_pgno < txn->mt_next_pgno);
mdbx_ensure(env, np->mp_pgno >= NUM_METAS); mdbx_ensure(env, np->mp_pgno >= NUM_METAS);
VALGRIND_MAKE_MEM_UNDEFINED(&np->mp_data, VALGRIND_MAKE_MEM_UNDEFINED(page_data(np), page_space(txn->mt_env));
txn->mt_env->me_psize - PAGEHDRSZ); ASAN_UNPOISON_MEMORY_REGION(page_data(np), page_space(txn->mt_env));
ASAN_UNPOISON_MEMORY_REGION(&np->mp_data,
txn->mt_env->me_psize - PAGEHDRSZ);
*mp = np; *mp = np;
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -4054,8 +4061,8 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
if (!META_IS_STEADY(head) || if (!META_IS_STEADY(head) ||
((flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) == 0 && unsynced_pages)) { ((flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) == 0 && unsynced_pages)) {
mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO, mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO,
container_of(head, MDBX_page, mp_data)->mp_pgno, data_page(head)->mp_pgno, mdbx_durable_str(head),
mdbx_durable_str(head), unsynced_pages); unsynced_pages);
MDBX_meta meta = *head; MDBX_meta meta = *head;
int err = mdbx_sync_locked(env, flags | MDBX_SHRINK_ALLOWED, &meta); int err = mdbx_sync_locked(env, flags | MDBX_SHRINK_ALLOWED, &meta);
if (unlikely(err != MDBX_SUCCESS)) { if (unlikely(err != MDBX_SUCCESS)) {
@ -6392,26 +6399,22 @@ fail:
/* Read the environment parameters of a DB environment /* Read the environment parameters of a DB environment
* before mapping it into memory. */ * before mapping it into memory. */
static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
uint64_t *filesize) { uint64_t *filesize) {
STATIC_ASSERT(offsetof(MDBX_page, mp_meta) == PAGEHDRSZ);
int rc = mdbx_filesize(env->me_fd, filesize); int rc = mdbx_filesize(env->me_fd, filesize);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
memset(meta, 0, sizeof(MDBX_meta)); memset(dest, 0, sizeof(MDBX_meta));
meta->mm_datasync_sign = MDBX_DATASIGN_WEAK; dest->mm_datasync_sign = MDBX_DATASIGN_WEAK;
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
/* Read twice all meta pages so we can find the latest one. */ /* Read twice all meta pages so we can find the latest one. */
unsigned loop_limit = NUM_METAS * 2; unsigned loop_limit = NUM_METAS * 2;
for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) { for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) {
MDBX_page page;
/* We don't know the page size on first time. /* We don't know the page size on first time.
* So, just guess it. */ * So, just guess it. */
unsigned guess_pagesize = meta->mm_psize; unsigned guess_pagesize = dest->mm_psize;
if (guess_pagesize == 0) if (guess_pagesize == 0)
guess_pagesize = guess_pagesize =
(loop_count > NUM_METAS) ? env->me_psize : env->me_os_psize; (loop_count > NUM_METAS) ? env->me_psize : env->me_os_psize;
@ -6419,52 +6422,55 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
const unsigned meta_number = loop_count % NUM_METAS; const unsigned meta_number = loop_count % NUM_METAS;
const unsigned offset = guess_pagesize * meta_number; const unsigned offset = guess_pagesize * meta_number;
char buffer[MIN_PAGESIZE];
unsigned retryleft = 42; unsigned retryleft = 42;
while (1) { while (1) {
mdbx_trace("reading meta[%d]: offset %u, bytes %u, retry-left %u", mdbx_trace("reading meta[%d]: offset %u, bytes %u, retry-left %u",
meta_number, offset, (unsigned)sizeof(page), retryleft); meta_number, offset, MIN_PAGESIZE, retryleft);
int err = mdbx_pread(env->me_fd, &page, sizeof(page), offset); int err = mdbx_pread(env->me_fd, buffer, MIN_PAGESIZE, offset);
if (err != MDBX_SUCCESS) { if (err != MDBX_SUCCESS) {
if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 &&
*filesize == 0 && (env->me_flags & MDBX_RDONLY) == 0) *filesize == 0 && (env->me_flags & MDBX_RDONLY) == 0)
mdbx_notice("read meta: empty file (%d, %s)", err, mdbx_notice("read meta: empty file (%d, %s)", err,
mdbx_strerror(err)); mdbx_strerror(err));
else else
mdbx_error("read meta[%u,%u]: %i, %s", offset, (unsigned)sizeof(page), mdbx_error("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err,
err, mdbx_strerror(err)); mdbx_strerror(err));
return err; return err;
} }
MDBX_page again; char again[MIN_PAGESIZE];
err = mdbx_pread(env->me_fd, &again, sizeof(again), offset); err = mdbx_pread(env->me_fd, again, MIN_PAGESIZE, offset);
if (err != MDBX_SUCCESS) { if (err != MDBX_SUCCESS) {
mdbx_error("read meta[%u,%u]: %i, %s", offset, (unsigned)sizeof(again), mdbx_error("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err,
err, mdbx_strerror(err)); mdbx_strerror(err));
return err; return err;
} }
if (memcmp(&page, &again, sizeof(page)) == 0 || --retryleft == 0) if (memcmp(buffer, again, MIN_PAGESIZE) == 0 || --retryleft == 0)
break; break;
mdbx_verbose("meta[%u] was updated, re-read it", meta_number); mdbx_verbose("meta[%u] was updated, re-read it", meta_number);
} }
if (page.mp_meta.mm_magic_and_version != MDBX_DATA_MAGIC && MDBX_page *const page = (MDBX_page *)buffer;
page.mp_meta.mm_magic_and_version != MDBX_DATA_MAGIC_DEVEL) { MDBX_meta *const meta = page_meta(page);
if (meta->mm_magic_and_version != MDBX_DATA_MAGIC &&
meta->mm_magic_and_version != MDBX_DATA_MAGIC_DEVEL) {
mdbx_error("meta[%u] has invalid magic/version %" PRIx64, meta_number, mdbx_error("meta[%u] has invalid magic/version %" PRIx64, meta_number,
page.mp_meta.mm_magic_and_version); meta->mm_magic_and_version);
return ((page.mp_meta.mm_magic_and_version >> 8) != MDBX_MAGIC) return ((meta->mm_magic_and_version >> 8) != MDBX_MAGIC)
? MDBX_INVALID ? MDBX_INVALID
: MDBX_VERSION_MISMATCH; : MDBX_VERSION_MISMATCH;
} }
if (page.mp_pgno != meta_number) { if (page->mp_pgno != meta_number) {
mdbx_error("meta[%u] has invalid pageno %" PRIaPGNO, meta_number, mdbx_error("meta[%u] has invalid pageno %" PRIaPGNO, meta_number,
page.mp_pgno); page->mp_pgno);
return MDBX_INVALID; return MDBX_INVALID;
} }
if (page.mp_flags != P_META) { if (page->mp_flags != P_META) {
mdbx_error("page #%u not a meta-page", meta_number); mdbx_error("page #%u not a meta-page", meta_number);
return MDBX_INVALID; return MDBX_INVALID;
} }
@ -6475,79 +6481,69 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
} }
/* LY: check pagesize */ /* LY: check pagesize */
if (!is_powerof2(page.mp_meta.mm_psize) || if (!is_powerof2(meta->mm_psize) || meta->mm_psize < MIN_PAGESIZE ||
page.mp_meta.mm_psize < MIN_PAGESIZE || meta->mm_psize > MAX_PAGESIZE) {
page.mp_meta.mm_psize > MAX_PAGESIZE) {
mdbx_notice("meta[%u] has invalid pagesize (%u), skip it", meta_number, mdbx_notice("meta[%u] has invalid pagesize (%u), skip it", meta_number,
page.mp_meta.mm_psize); meta->mm_psize);
rc = is_powerof2(page.mp_meta.mm_psize) ? MDBX_VERSION_MISMATCH rc = is_powerof2(meta->mm_psize) ? MDBX_VERSION_MISMATCH : MDBX_INVALID;
: MDBX_INVALID;
continue; continue;
} }
if (meta_number == 0 && guess_pagesize != page.mp_meta.mm_psize) { if (meta_number == 0 && guess_pagesize != meta->mm_psize) {
meta->mm_psize = page.mp_meta.mm_psize; dest->mm_psize = meta->mm_psize;
mdbx_verbose("meta[%u] took pagesize %u", meta_number, mdbx_verbose("meta[%u] took pagesize %u", meta_number, meta->mm_psize);
page.mp_meta.mm_psize);
} }
if (safe64_read(&page.mp_meta.mm_txnid_a) != if (safe64_read(&meta->mm_txnid_a) != safe64_read(&meta->mm_txnid_b)) {
safe64_read(&page.mp_meta.mm_txnid_b)) {
mdbx_warning("meta[%u] not completely updated, skip it", meta_number); mdbx_warning("meta[%u] not completely updated, skip it", meta_number);
continue; continue;
} }
/* LY: check signature as a checksum */ /* LY: check signature as a checksum */
if (META_IS_STEADY(&page.mp_meta) && if (META_IS_STEADY(meta) &&
page.mp_meta.mm_datasync_sign != mdbx_meta_sign(&page.mp_meta)) { meta->mm_datasync_sign != mdbx_meta_sign(meta)) {
mdbx_notice("meta[%u] has invalid steady-checksum (0x%" PRIx64 mdbx_notice("meta[%u] has invalid steady-checksum (0x%" PRIx64
" != 0x%" PRIx64 "), skip it", " != 0x%" PRIx64 "), skip it",
meta_number, page.mp_meta.mm_datasync_sign, meta_number, meta->mm_datasync_sign, mdbx_meta_sign(meta));
mdbx_meta_sign(&page.mp_meta));
continue; continue;
} }
mdbx_debug("read meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO mdbx_debug("read meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO
", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
" +%u -%u, txn_id %" PRIaTXN ", %s", " +%u -%u, txn_id %" PRIaTXN ", %s",
page.mp_pgno, page.mp_meta.mm_dbs[MAIN_DBI].md_root, page->mp_pgno, meta->mm_dbs[MAIN_DBI].md_root,
page.mp_meta.mm_dbs[FREE_DBI].md_root, page.mp_meta.mm_geo.lower, meta->mm_dbs[FREE_DBI].md_root, meta->mm_geo.lower,
page.mp_meta.mm_geo.next, page.mp_meta.mm_geo.now, meta->mm_geo.next, meta->mm_geo.now, meta->mm_geo.upper,
page.mp_meta.mm_geo.upper, page.mp_meta.mm_geo.grow, meta->mm_geo.grow, meta->mm_geo.shrink,
page.mp_meta.mm_geo.shrink, page.mp_meta.mm_txnid_a.inconsistent, meta->mm_txnid_a.inconsistent, mdbx_durable_str(meta));
mdbx_durable_str(&page.mp_meta));
/* LY: check min-pages value */ /* LY: check min-pages value */
if (page.mp_meta.mm_geo.lower < MIN_PAGENO || if (meta->mm_geo.lower < MIN_PAGENO || meta->mm_geo.lower > MAX_PAGENO) {
page.mp_meta.mm_geo.lower > MAX_PAGENO) {
mdbx_notice("meta[%u] has invalid min-pages (%" PRIaPGNO "), skip it", mdbx_notice("meta[%u] has invalid min-pages (%" PRIaPGNO "), skip it",
meta_number, page.mp_meta.mm_geo.lower); meta_number, meta->mm_geo.lower);
rc = MDBX_INVALID; rc = MDBX_INVALID;
continue; continue;
} }
/* LY: check max-pages value */ /* LY: check max-pages value */
if (page.mp_meta.mm_geo.upper < MIN_PAGENO || if (meta->mm_geo.upper < MIN_PAGENO || meta->mm_geo.upper > MAX_PAGENO ||
page.mp_meta.mm_geo.upper > MAX_PAGENO || meta->mm_geo.upper < meta->mm_geo.lower) {
page.mp_meta.mm_geo.upper < page.mp_meta.mm_geo.lower) {
mdbx_notice("meta[%u] has invalid max-pages (%" PRIaPGNO "), skip it", mdbx_notice("meta[%u] has invalid max-pages (%" PRIaPGNO "), skip it",
meta_number, page.mp_meta.mm_geo.upper); meta_number, meta->mm_geo.upper);
rc = MDBX_INVALID; rc = MDBX_INVALID;
continue; continue;
} }
/* LY: check last_pgno */ /* LY: check last_pgno */
if (page.mp_meta.mm_geo.next < MIN_PAGENO || if (meta->mm_geo.next < MIN_PAGENO || meta->mm_geo.next - 1 > MAX_PAGENO) {
page.mp_meta.mm_geo.next - 1 > MAX_PAGENO) {
mdbx_notice("meta[%u] has invalid next-pageno (%" PRIaPGNO "), skip it", mdbx_notice("meta[%u] has invalid next-pageno (%" PRIaPGNO "), skip it",
meta_number, page.mp_meta.mm_geo.next); meta_number, meta->mm_geo.next);
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
continue; continue;
} }
/* LY: check filesize & used_bytes */ /* LY: check filesize & used_bytes */
const uint64_t used_bytes = const uint64_t used_bytes = meta->mm_geo.next * (uint64_t)meta->mm_psize;
page.mp_meta.mm_geo.next * (uint64_t)page.mp_meta.mm_psize;
if (used_bytes > *filesize) { if (used_bytes > *filesize) {
/* Here could be a race with DB-shrinking performed by other process */ /* Here could be a race with DB-shrinking performed by other process */
rc = mdbx_filesize(env->me_fd, filesize); rc = mdbx_filesize(env->me_fd, filesize);
@ -6563,8 +6559,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
} }
/* LY: check mapsize limits */ /* LY: check mapsize limits */
const uint64_t mapsize_min = const uint64_t mapsize_min = meta->mm_geo.lower * (uint64_t)meta->mm_psize;
page.mp_meta.mm_geo.lower * (uint64_t)page.mp_meta.mm_psize;
STATIC_ASSERT(MAX_MAPSIZE < PTRDIFF_MAX - MAX_PAGESIZE); STATIC_ASSERT(MAX_MAPSIZE < PTRDIFF_MAX - MAX_PAGESIZE);
STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE);
if (mapsize_min < MIN_MAPSIZE || mapsize_min > MAX_MAPSIZE) { if (mapsize_min < MIN_MAPSIZE || mapsize_min > MAX_MAPSIZE) {
@ -6574,14 +6569,12 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
continue; continue;
} }
const uint64_t mapsize_max = const uint64_t mapsize_max = meta->mm_geo.upper * (uint64_t)meta->mm_psize;
page.mp_meta.mm_geo.upper * (uint64_t)page.mp_meta.mm_psize;
STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE); STATIC_ASSERT(MIN_MAPSIZE < MAX_MAPSIZE);
if (mapsize_max > MAX_MAPSIZE || if (mapsize_max > MAX_MAPSIZE ||
MAX_PAGENO < roundup_powerof2((size_t)mapsize_max, env->me_os_psize) / MAX_PAGENO < roundup_powerof2((size_t)mapsize_max, env->me_os_psize) /
(size_t)page.mp_meta.mm_psize) { (size_t)meta->mm_psize) {
if (page.mp_meta.mm_geo.next - 1 > MAX_PAGENO || if (meta->mm_geo.next - 1 > MAX_PAGENO || used_bytes > MAX_MAPSIZE) {
used_bytes > MAX_MAPSIZE) {
mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), skip it", mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), skip it",
meta_number, mapsize_max); meta_number, mapsize_max);
rc = MDBX_TOO_LARGE; rc = MDBX_TOO_LARGE;
@ -6592,7 +6585,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), " mdbx_notice("meta[%u] has too large max-mapsize (%" PRIu64 "), "
"but size of used space still acceptable (%" PRIu64 ")", "but size of used space still acceptable (%" PRIu64 ")",
meta_number, mapsize_max, used_bytes); meta_number, mapsize_max, used_bytes);
page.mp_meta.mm_geo.upper = (pgno_t)(MAX_MAPSIZE / page.mp_meta.mm_psize); meta->mm_geo.upper = (pgno_t)(MAX_MAPSIZE / meta->mm_psize);
} }
/* LY: check and silently put mm_geo.now into [geo.lower...geo.upper]. /* LY: check and silently put mm_geo.now into [geo.lower...geo.upper].
@ -6602,72 +6595,70 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
* at all. This is not a problem as there is no damage or loss of data. * at all. This is not a problem as there is no damage or loss of data.
* Therefore it is better not to consider such situation as an error, but * Therefore it is better not to consider such situation as an error, but
* silently correct it. */ * silently correct it. */
if (page.mp_meta.mm_geo.now < page.mp_meta.mm_geo.lower) if (meta->mm_geo.now < meta->mm_geo.lower)
page.mp_meta.mm_geo.now = page.mp_meta.mm_geo.lower; meta->mm_geo.now = meta->mm_geo.lower;
if (page.mp_meta.mm_geo.now > page.mp_meta.mm_geo.upper) if (meta->mm_geo.now > meta->mm_geo.upper &&
page.mp_meta.mm_geo.now = page.mp_meta.mm_geo.upper; meta->mm_geo.next <= meta->mm_geo.upper)
meta->mm_geo.now = meta->mm_geo.upper;
if (page.mp_meta.mm_geo.next > page.mp_meta.mm_geo.now) { if (meta->mm_geo.next > meta->mm_geo.now) {
mdbx_notice("meta[%u] next-pageno (%" PRIaPGNO mdbx_notice("meta[%u] next-pageno (%" PRIaPGNO
") is beyond end-pgno (%" PRIaPGNO "), skip it", ") is beyond end-pgno (%" PRIaPGNO "), skip it",
meta_number, page.mp_meta.mm_geo.next, meta_number, meta->mm_geo.next, meta->mm_geo.now);
page.mp_meta.mm_geo.now);
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
continue; continue;
} }
/* LY: GC root */ /* LY: GC root */
if (page.mp_meta.mm_dbs[FREE_DBI].md_root == P_INVALID) { if (meta->mm_dbs[FREE_DBI].md_root == P_INVALID) {
if (page.mp_meta.mm_dbs[FREE_DBI].md_branch_pages || if (meta->mm_dbs[FREE_DBI].md_branch_pages ||
page.mp_meta.mm_dbs[FREE_DBI].md_depth || meta->mm_dbs[FREE_DBI].md_depth ||
page.mp_meta.mm_dbs[FREE_DBI].md_entries || meta->mm_dbs[FREE_DBI].md_entries ||
page.mp_meta.mm_dbs[FREE_DBI].md_leaf_pages || meta->mm_dbs[FREE_DBI].md_leaf_pages ||
page.mp_meta.mm_dbs[FREE_DBI].md_overflow_pages) { meta->mm_dbs[FREE_DBI].md_overflow_pages) {
mdbx_notice("meta[%u] has false-empty GC, skip it", meta_number); mdbx_notice("meta[%u] has false-empty GC, skip it", meta_number);
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
continue; continue;
} }
} else if (page.mp_meta.mm_dbs[FREE_DBI].md_root >= } else if (meta->mm_dbs[FREE_DBI].md_root >= meta->mm_geo.next) {
page.mp_meta.mm_geo.next) {
mdbx_notice("meta[%u] has invalid GC-root %" PRIaPGNO ", skip it", mdbx_notice("meta[%u] has invalid GC-root %" PRIaPGNO ", skip it",
meta_number, page.mp_meta.mm_dbs[FREE_DBI].md_root); meta_number, meta->mm_dbs[FREE_DBI].md_root);
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
continue; continue;
} }
/* LY: MainDB root */ /* LY: MainDB root */
if (page.mp_meta.mm_dbs[MAIN_DBI].md_root == P_INVALID) { if (meta->mm_dbs[MAIN_DBI].md_root == P_INVALID) {
if (page.mp_meta.mm_dbs[MAIN_DBI].md_branch_pages || if (meta->mm_dbs[MAIN_DBI].md_branch_pages ||
page.mp_meta.mm_dbs[MAIN_DBI].md_depth || meta->mm_dbs[MAIN_DBI].md_depth ||
page.mp_meta.mm_dbs[MAIN_DBI].md_entries || meta->mm_dbs[MAIN_DBI].md_entries ||
page.mp_meta.mm_dbs[MAIN_DBI].md_leaf_pages || meta->mm_dbs[MAIN_DBI].md_leaf_pages ||
page.mp_meta.mm_dbs[MAIN_DBI].md_overflow_pages) { meta->mm_dbs[MAIN_DBI].md_overflow_pages) {
mdbx_notice("meta[%u] has false-empty maindb", meta_number); mdbx_notice("meta[%u] has false-empty maindb", meta_number);
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
continue; continue;
} }
} else if (page.mp_meta.mm_dbs[MAIN_DBI].md_root >= } else if (meta->mm_dbs[MAIN_DBI].md_root >= meta->mm_geo.next) {
page.mp_meta.mm_geo.next) {
mdbx_notice("meta[%u] has invalid maindb-root %" PRIaPGNO ", skip it", mdbx_notice("meta[%u] has invalid maindb-root %" PRIaPGNO ", skip it",
meta_number, page.mp_meta.mm_dbs[MAIN_DBI].md_root); meta_number, meta->mm_dbs[MAIN_DBI].md_root);
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
continue; continue;
} }
if (safe64_read(&page.mp_meta.mm_txnid_a) == 0) { if (safe64_read(&meta->mm_txnid_a) == 0) {
mdbx_warning("meta[%u] has zero txnid, skip it", meta_number); mdbx_warning("meta[%u] has zero txnid, skip it", meta_number);
continue; continue;
} }
if (mdbx_meta_ot(prefer_noweak, env, meta, &page.mp_meta)) { if (mdbx_meta_ot(prefer_noweak, env, dest, meta)) {
*meta = page.mp_meta; *dest = *meta;
if (META_IS_WEAK(meta)) if (META_IS_WEAK(dest))
loop_limit += 1; /* LY: should re-read to hush race with update */ loop_limit += 1; /* LY: should re-read to hush race with update */
mdbx_verbose("latch meta[%u]", meta_number); mdbx_verbose("latch meta[%u]", meta_number);
} }
} }
if (META_IS_WEAK(meta)) { if (META_IS_WEAK(dest)) {
mdbx_error("no usable meta-pages, database is corrupted"); mdbx_error("no usable meta-pages, database is corrupted");
return rc; return rc;
} }
@ -6689,49 +6680,49 @@ static MDBX_page *__cold mdbx_meta_model(const MDBX_env *env, MDBX_page *model,
memset(model, 0, sizeof(*model)); memset(model, 0, sizeof(*model));
model->mp_pgno = num; model->mp_pgno = num;
model->mp_flags = P_META; model->mp_flags = P_META;
model->mp_meta.mm_magic_and_version = MDBX_DATA_MAGIC; MDBX_meta *const model_meta = page_meta(model);
model_meta->mm_magic_and_version = MDBX_DATA_MAGIC;
model->mp_meta.mm_geo.lower = bytes2pgno(env, env->me_dbgeo.lower); model_meta->mm_geo.lower = bytes2pgno(env, env->me_dbgeo.lower);
model->mp_meta.mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper); model_meta->mm_geo.upper = bytes2pgno(env, env->me_dbgeo.upper);
model->mp_meta.mm_geo.grow = (uint16_t)bytes2pgno(env, env->me_dbgeo.grow); model_meta->mm_geo.grow = (uint16_t)bytes2pgno(env, env->me_dbgeo.grow);
model->mp_meta.mm_geo.shrink = model_meta->mm_geo.shrink = (uint16_t)bytes2pgno(env, env->me_dbgeo.shrink);
(uint16_t)bytes2pgno(env, env->me_dbgeo.shrink); model_meta->mm_geo.now = bytes2pgno(env, env->me_dbgeo.now);
model->mp_meta.mm_geo.now = bytes2pgno(env, env->me_dbgeo.now); model_meta->mm_geo.next = NUM_METAS;
model->mp_meta.mm_geo.next = NUM_METAS;
mdbx_ensure(env, model->mp_meta.mm_geo.lower >= MIN_PAGENO); mdbx_ensure(env, model_meta->mm_geo.lower >= MIN_PAGENO);
mdbx_ensure(env, model->mp_meta.mm_geo.upper <= MAX_PAGENO); mdbx_ensure(env, model_meta->mm_geo.upper <= MAX_PAGENO);
mdbx_ensure(env, model->mp_meta.mm_geo.now >= model->mp_meta.mm_geo.lower); mdbx_ensure(env, model_meta->mm_geo.now >= model_meta->mm_geo.lower);
mdbx_ensure(env, model->mp_meta.mm_geo.now <= model->mp_meta.mm_geo.upper); mdbx_ensure(env, model_meta->mm_geo.now <= model_meta->mm_geo.upper);
mdbx_ensure(env, model->mp_meta.mm_geo.next >= MIN_PAGENO); mdbx_ensure(env, model_meta->mm_geo.next >= MIN_PAGENO);
mdbx_ensure(env, model->mp_meta.mm_geo.next <= model->mp_meta.mm_geo.now); mdbx_ensure(env, model_meta->mm_geo.next <= model_meta->mm_geo.now);
mdbx_ensure(env, model->mp_meta.mm_geo.grow == mdbx_ensure(env,
bytes2pgno(env, env->me_dbgeo.grow)); model_meta->mm_geo.grow == bytes2pgno(env, env->me_dbgeo.grow));
mdbx_ensure(env, model->mp_meta.mm_geo.shrink == mdbx_ensure(env, model_meta->mm_geo.shrink ==
bytes2pgno(env, env->me_dbgeo.shrink)); bytes2pgno(env, env->me_dbgeo.shrink));
model->mp_meta.mm_psize = env->me_psize; model_meta->mm_psize = env->me_psize;
model->mp_meta.mm_flags = (uint16_t)env->me_flags; model_meta->mm_flags = (uint16_t)env->me_flags;
model->mp_meta.mm_flags |= model_meta->mm_flags |=
MDBX_INTEGERKEY; /* this is mm_dbs[FREE_DBI].md_flags */ MDBX_INTEGERKEY; /* this is mm_dbs[FREE_DBI].md_flags */
model->mp_meta.mm_dbs[FREE_DBI].md_root = P_INVALID; model_meta->mm_dbs[FREE_DBI].md_root = P_INVALID;
model->mp_meta.mm_dbs[MAIN_DBI].md_root = P_INVALID; model_meta->mm_dbs[MAIN_DBI].md_root = P_INVALID;
mdbx_meta_set_txnid(env, &model->mp_meta, MIN_TXNID + num); mdbx_meta_set_txnid(env, model_meta, MIN_TXNID + num);
model->mp_meta.mm_datasync_sign = mdbx_meta_sign(&model->mp_meta); model_meta->mm_datasync_sign = mdbx_meta_sign(model_meta);
return (MDBX_page *)((uint8_t *)model + env->me_psize); return (MDBX_page *)((uint8_t *)model + env->me_psize);
} }
/* Fill in most of the zeroed meta-pages for an empty database environment. /* Fill in most of the zeroed meta-pages for an empty database environment.
* Return pointer to recenly (head) meta-page. */ * Return pointer to recenly (head) meta-page. */
static MDBX_page *__cold mdbx_init_metas(const MDBX_env *env, void *buffer) { static MDBX_meta *__cold mdbx_init_metas(const MDBX_env *env, void *buffer) {
MDBX_page *page0 = (MDBX_page *)buffer; MDBX_page *page0 = (MDBX_page *)buffer;
MDBX_page *page1 = mdbx_meta_model(env, page0, 0); MDBX_page *page1 = mdbx_meta_model(env, page0, 0);
MDBX_page *page2 = mdbx_meta_model(env, page1, 1); MDBX_page *page2 = mdbx_meta_model(env, page1, 1);
mdbx_meta_model(env, page2, 2); mdbx_meta_model(env, page2, 2);
mdbx_assert(env, !mdbx_meta_eq(env, &page0->mp_meta, &page1->mp_meta)); mdbx_assert(env, !mdbx_meta_eq(env, page_meta(page0), page_meta(page1)));
mdbx_assert(env, !mdbx_meta_eq(env, &page1->mp_meta, &page2->mp_meta)); mdbx_assert(env, !mdbx_meta_eq(env, page_meta(page1), page_meta(page2)));
mdbx_assert(env, !mdbx_meta_eq(env, &page2->mp_meta, &page0->mp_meta)); mdbx_assert(env, !mdbx_meta_eq(env, page_meta(page2), page_meta(page0)));
return page2; return page_meta(page2);
} }
static int mdbx_sync_locked(MDBX_env *env, unsigned flags, static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
@ -6895,8 +6886,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_debug("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO mdbx_debug("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO
", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
" +%u -%u, txn_id %" PRIaTXN ", %s", " +%u -%u, txn_id %" PRIaTXN ", %s",
container_of(target, MDBX_page, mp_data)->mp_pgno, data_page(target)->mp_pgno, pending->mm_dbs[MAIN_DBI].md_root,
pending->mm_dbs[MAIN_DBI].md_root,
pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.lower, pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.lower,
pending->mm_geo.next, pending->mm_geo.now, pending->mm_geo.upper, pending->mm_geo.next, pending->mm_geo.now, pending->mm_geo.upper,
pending->mm_geo.grow, pending->mm_geo.shrink, pending->mm_geo.grow, pending->mm_geo.shrink,
@ -6988,9 +6978,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
if ((flags & (MDBX_NOSYNC | MDBX_NOMETASYNC)) == 0) { if ((flags & (MDBX_NOSYNC | MDBX_NOMETASYNC)) == 0) {
mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0); mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
if (flags & MDBX_WRITEMAP) { if (flags & MDBX_WRITEMAP) {
const size_t offset = const size_t offset = (uint8_t *)data_page(head) - env->me_dxb_mmap.dxb;
((uint8_t *)container_of(head, MDBX_page, mp_meta)) -
env->me_dxb_mmap.dxb;
const size_t paged_offset = offset & ~(env->me_os_psize - 1); const size_t paged_offset = offset & ~(env->me_os_psize - 1);
const size_t paged_length = roundup_powerof2( const size_t paged_length = roundup_powerof2(
env->me_psize + offset - paged_offset, env->me_os_psize); env->me_psize + offset - paged_offset, env->me_os_psize);
@ -7038,7 +7026,7 @@ int __cold mdbx_env_get_maxkeysize(MDBX_env *env) {
static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) { static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) {
STATIC_ASSERT(PTRDIFF_MAX > MAX_MAPSIZE); STATIC_ASSERT(PTRDIFF_MAX > MAX_MAPSIZE);
STATIC_ASSERT(MIN_PAGESIZE > sizeof(MDBX_page)); STATIC_ASSERT(MIN_PAGESIZE > sizeof(MDBX_page) + sizeof(MDBX_meta));
mdbx_ensure(env, is_powerof2(pagesize)); mdbx_ensure(env, is_powerof2(pagesize));
mdbx_ensure(env, pagesize >= MIN_PAGESIZE); mdbx_ensure(env, pagesize >= MIN_PAGESIZE);
mdbx_ensure(env, pagesize <= MAX_PAGESIZE); mdbx_ensure(env, pagesize <= MAX_PAGESIZE);
@ -7613,7 +7601,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
if (!buffer) if (!buffer)
return MDBX_ENOMEM; return MDBX_ENOMEM;
meta = mdbx_init_metas(env, buffer)->mp_meta; meta = *mdbx_init_metas(env, buffer);
err = mdbx_pwrite(env->me_fd, buffer, env->me_psize * NUM_METAS, 0); err = mdbx_pwrite(env->me_fd, buffer, env->me_psize * NUM_METAS, 0);
mdbx_free(buffer); mdbx_free(buffer);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
@ -7824,9 +7812,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
head->mm_txnid_a.inconsistent = undo_txnid; head->mm_txnid_a.inconsistent = undo_txnid;
head->mm_datasync_sign = MDBX_DATASIGN_WEAK; head->mm_datasync_sign = MDBX_DATASIGN_WEAK;
head->mm_txnid_b.inconsistent = undo_txnid; head->mm_txnid_b.inconsistent = undo_txnid;
const size_t offset = const size_t offset = (uint8_t *)data_page(head) - env->me_dxb_mmap.dxb;
((uint8_t *)container_of(head, MDBX_page, mp_meta)) -
env->me_dxb_mmap.dxb;
const size_t paged_offset = offset & ~(env->me_os_psize - 1); const size_t paged_offset = offset & ~(env->me_os_psize - 1);
const size_t paged_length = roundup_powerof2( const size_t paged_length = roundup_powerof2(
env->me_psize + offset - paged_offset, env->me_os_psize); env->me_psize + offset - paged_offset, env->me_os_psize);
@ -8015,8 +8001,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
goto bailout; goto bailout;
if (lck_seize_rc == MDBX_RESULT_TRUE) { if (lck_seize_rc == MDBX_RESULT_TRUE) {
uint64_t wanna = roundup_powerof2( uint64_t wanna = roundup_powerof2(env->me_maxreaders * sizeof(MDBX_reader) +
(env->me_maxreaders - 1) * sizeof(MDBX_reader) + sizeof(MDBX_lockinfo), sizeof(MDBX_lockinfo),
env->me_os_psize); env->me_os_psize);
#ifndef NDEBUG #ifndef NDEBUG
err = mdbx_ftruncate(env->me_lfd, size = 0); err = mdbx_ftruncate(env->me_lfd, size = 0);
@ -8045,8 +8031,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
} }
const size_t maxreaders = const size_t maxreaders =
((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader) + 1; ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader);
if (maxreaders < 2 || maxreaders > MDBX_READERS_LIMIT) { if (size > 65536 || maxreaders < 2 || maxreaders > MDBX_READERS_LIMIT) {
mdbx_error("lck-size too big (up to %" PRIuPTR " readers)", maxreaders); mdbx_error("lck-size too big (up to %" PRIuPTR " readers)", maxreaders);
err = MDBX_PROBLEM; err = MDBX_PROBLEM;
goto bailout; goto bailout;
@ -8424,8 +8410,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
mdbx_debug("opened database version %u, pagesize %u", mdbx_debug("opened database version %u, pagesize %u",
(uint8_t)meta->mm_magic_and_version, env->me_psize); (uint8_t)meta->mm_magic_and_version, env->me_psize);
mdbx_debug("using meta page %" PRIaPGNO ", txn %" PRIaTXN, mdbx_debug("using meta page %" PRIaPGNO ", txn %" PRIaTXN,
container_of(meta, MDBX_page, mp_data)->mp_pgno, data_page(meta)->mp_pgno, mdbx_meta_txnid_fluid(env, meta));
mdbx_meta_txnid_fluid(env, meta));
mdbx_debug("depth: %u", db->md_depth); mdbx_debug("depth: %u", db->md_depth);
mdbx_debug("entries: %" PRIu64, db->md_entries); mdbx_debug("entries: %" PRIu64, db->md_entries);
mdbx_debug("branch pages: %" PRIaPGNO, db->md_branch_pages); mdbx_debug("branch pages: %" PRIaPGNO, db->md_branch_pages);
@ -13428,34 +13413,31 @@ done:
return rc; return rc;
} }
static void compact_fixup_meta(MDBX_env *env, MDBX_page *meta) { static __cold void compact_fixup_meta(MDBX_env *env, MDBX_meta *meta) {
/* Calculate filesize taking in account shrink/growing thresholds */ /* Calculate filesize taking in account shrink/growing thresholds */
if (meta->mp_meta.mm_geo.next > meta->mp_meta.mm_geo.now) { if (meta->mm_geo.next > meta->mm_geo.now) {
const pgno_t aligned = const pgno_t aligned = pgno_align2os_pgno(
pgno_align2os_pgno(env, pgno_add(meta->mp_meta.mm_geo.next, env,
meta->mp_meta.mm_geo.grow - pgno_add(meta->mm_geo.next,
meta->mp_meta.mm_geo.next % meta->mm_geo.grow - meta->mm_geo.next % meta->mm_geo.grow));
meta->mp_meta.mm_geo.grow)); meta->mm_geo.now = aligned;
meta->mp_meta.mm_geo.now = aligned; } else if (meta->mm_geo.next < meta->mm_geo.now) {
} else if (meta->mp_meta.mm_geo.next < meta->mp_meta.mm_geo.now) { meta->mm_geo.now = meta->mm_geo.next;
meta->mp_meta.mm_geo.now = meta->mp_meta.mm_geo.next; const pgno_t aligner =
const pgno_t aligner = meta->mp_meta.mm_geo.grow meta->mm_geo.grow ? meta->mm_geo.grow : meta->mm_geo.shrink;
? meta->mp_meta.mm_geo.grow const pgno_t aligned = pgno_align2os_pgno(
: meta->mp_meta.mm_geo.shrink; env, meta->mm_geo.next + aligner - meta->mm_geo.next % aligner);
const pgno_t aligned = meta->mm_geo.now = aligned;
pgno_align2os_pgno(env, meta->mp_meta.mm_geo.next + aligner -
meta->mp_meta.mm_geo.next % aligner);
meta->mp_meta.mm_geo.now = aligned;
} }
if (meta->mp_meta.mm_geo.now < meta->mp_meta.mm_geo.lower) if (meta->mm_geo.now < meta->mm_geo.lower)
meta->mp_meta.mm_geo.now = meta->mp_meta.mm_geo.lower; meta->mm_geo.now = meta->mm_geo.lower;
if (meta->mp_meta.mm_geo.now > meta->mp_meta.mm_geo.upper) if (meta->mm_geo.now > meta->mm_geo.upper)
meta->mp_meta.mm_geo.now = meta->mp_meta.mm_geo.upper; meta->mm_geo.now = meta->mm_geo.upper;
/* Update signature */ /* Update signature */
assert(meta->mp_meta.mm_geo.now >= meta->mp_meta.mm_geo.next); assert(meta->mm_geo.now >= meta->mm_geo.next);
meta->mp_meta.mm_datasync_sign = mdbx_meta_sign(&meta->mp_meta); meta->mm_datasync_sign = mdbx_meta_sign(meta);
} }
/* Copy environment with compaction. */ /* Copy environment with compaction. */
@ -13463,12 +13445,13 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
mdbx_filehandle_t fd, uint8_t *buffer, mdbx_filehandle_t fd, uint8_t *buffer,
const bool dest_is_pipe) { const bool dest_is_pipe) {
const size_t meta_bytes = pgno2bytes(env, NUM_METAS); const size_t meta_bytes = pgno2bytes(env, NUM_METAS);
uint8_t *const data_buffer = buffer + meta_bytes; uint8_t *const data_buffer =
MDBX_page *const meta = mdbx_init_metas(env, buffer); buffer + roundup_powerof2(meta_bytes, env->me_os_psize);
MDBX_meta *const meta = mdbx_init_metas(env, buffer);
/* copy canary sequenses if present */ /* copy canary sequenses if present */
if (read_txn->mt_canary.v) { if (read_txn->mt_canary.v) {
meta->mp_meta.mm_canary = read_txn->mt_canary; meta->mm_canary = read_txn->mt_canary;
meta->mp_meta.mm_canary.v = mdbx_meta_txnid_stable(env, &meta->mp_meta); meta->mm_canary.v = mdbx_meta_txnid_stable(env, meta);
} }
/* Set metapage 1 with current main DB */ /* Set metapage 1 with current main DB */
@ -13476,8 +13459,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
if ((new_root = root) == P_INVALID) { if ((new_root = root) == P_INVALID) {
/* When the DB is empty, handle it specially to /* When the DB is empty, handle it specially to
* fix any breakage like page leaks from ITS#8174. */ * fix any breakage like page leaks from ITS#8174. */
meta->mp_meta.mm_dbs[MAIN_DBI].md_flags = meta->mm_dbs[MAIN_DBI].md_flags = read_txn->mt_dbs[MAIN_DBI].md_flags;
read_txn->mt_dbs[MAIN_DBI].md_flags;
compact_fixup_meta(env, meta); compact_fixup_meta(env, meta);
if (dest_is_pipe) { if (dest_is_pipe) {
int rc = mdbx_write(fd, buffer, meta_bytes); int rc = mdbx_write(fd, buffer, meta_bytes);
@ -13504,9 +13486,9 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
read_txn->mt_dbs[FREE_DBI].md_overflow_pages; read_txn->mt_dbs[FREE_DBI].md_overflow_pages;
new_root = read_txn->mt_next_pgno - 1 - freecount; new_root = read_txn->mt_next_pgno - 1 - freecount;
meta->mp_meta.mm_geo.next = new_root + 1; meta->mm_geo.next = new_root + 1;
meta->mp_meta.mm_dbs[MAIN_DBI] = read_txn->mt_dbs[MAIN_DBI]; meta->mm_dbs[MAIN_DBI] = read_txn->mt_dbs[MAIN_DBI];
meta->mp_meta.mm_dbs[MAIN_DBI].md_root = new_root; meta->mm_dbs[MAIN_DBI].md_root = new_root;
mdbx_copy ctx; mdbx_copy ctx;
memset(&ctx, 0, sizeof(ctx)); memset(&ctx, 0, sizeof(ctx));
@ -13562,20 +13544,20 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
" LT expected %" PRIaPGNO " (page leak(s) in source DB)", " LT expected %" PRIaPGNO " (page leak(s) in source DB)",
root, new_root); root, new_root);
/* fixup meta */ /* fixup meta */
meta->mp_meta.mm_dbs[MAIN_DBI].md_root = root; meta->mm_dbs[MAIN_DBI].md_root = root;
meta->mp_meta.mm_geo.next = root + 1; meta->mm_geo.next = root + 1;
} }
compact_fixup_meta(env, meta); compact_fixup_meta(env, meta);
} }
} }
/* Extend file if required */ /* Extend file if required */
if (meta->mp_meta.mm_geo.now != meta->mp_meta.mm_geo.next) { if (meta->mm_geo.now != meta->mm_geo.next) {
const size_t whole_size = pgno2bytes(env, meta->mp_meta.mm_geo.now); const size_t whole_size = pgno2bytes(env, meta->mm_geo.now);
if (!dest_is_pipe) if (!dest_is_pipe)
return mdbx_ftruncate(fd, whole_size); return mdbx_ftruncate(fd, whole_size);
const size_t used_size = pgno2bytes(env, meta->mp_meta.mm_geo.next); const size_t used_size = pgno2bytes(env, meta->mm_geo.next);
memset(data_buffer, 0, MDBX_WBUF); memset(data_buffer, 0, MDBX_WBUF);
for (size_t offset = used_size; offset < whole_size;) { for (size_t offset = used_size; offset < whole_size;) {
const size_t chunk = const size_t chunk =
@ -13630,7 +13612,8 @@ static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
if (dest_is_pipe) if (dest_is_pipe)
rc = mdbx_write(fd, buffer, meta_bytes); rc = mdbx_write(fd, buffer, meta_bytes);
uint8_t *const data_buffer = buffer + meta_bytes; uint8_t *const data_buffer =
buffer + roundup_powerof2(meta_bytes, env->me_os_psize);
for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) { for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) {
if (dest_is_pipe) { if (dest_is_pipe) {
#if defined(__linux__) || defined(__gnu_linux__) #if defined(__linux__) || defined(__gnu_linux__)
@ -13705,9 +13688,9 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd,
return rc; return rc;
} }
const size_t buffer_size = roundup_powerof2( const size_t buffer_size =
pgno2bytes(env, NUM_METAS) + roundup_powerof2(pgno2bytes(env, NUM_METAS), env->me_os_psize) +
((flags & MDBX_CP_COMPACT) ? MDBX_WBUF * 2 : MDBX_WBUF), roundup_powerof2(((flags & MDBX_CP_COMPACT) ? MDBX_WBUF * 2 : MDBX_WBUF),
env->me_os_psize); env->me_os_psize);
uint8_t *buffer = NULL; uint8_t *buffer = NULL;

View File

@ -467,11 +467,7 @@ typedef struct MDBX_page {
pgno_t mp_pgno; /* page number */ pgno_t mp_pgno; /* page number */
/* dynamic size */ /* dynamic size */
union { indx_t mp_ptrs[/* C99 */];
indx_t mp_ptrs[1];
MDBX_meta mp_meta;
uint8_t mp_data[1];
};
} MDBX_page; } MDBX_page;
/* Size of the page header, excluding dynamic data at the end */ /* Size of the page header, excluding dynamic data at the end */
@ -629,7 +625,7 @@ typedef struct MDBX_lockinfo {
volatile unsigned mti_readers_refresh_flag; volatile unsigned mti_readers_refresh_flag;
alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/ alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/
MDBX_reader mti_readers[1]; MDBX_reader mti_readers[/* C99 */];
} MDBX_lockinfo; } MDBX_lockinfo;
/* Lockfile format signature: version, features and field layout */ /* Lockfile format signature: version, features and field layout */
@ -637,7 +633,8 @@ typedef struct MDBX_lockinfo {
(MDBX_OSAL_LOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 + \ (MDBX_OSAL_LOCK_SIGN * 27733 + (unsigned)sizeof(MDBX_reader) * 13 + \
(unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 + \ (unsigned)offsetof(MDBX_reader, mr_snapshot_pages_used) * 251 + \
(unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 + \ (unsigned)offsetof(MDBX_lockinfo, mti_oldest_reader) * 83 + \
(unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 29) (unsigned)offsetof(MDBX_lockinfo, mti_numreaders) * 37 + \
(unsigned)offsetof(MDBX_lockinfo, mti_readers) * 29)
#define MDBX_DATA_MAGIC ((MDBX_MAGIC << 8) + MDBX_DATA_VERSION) #define MDBX_DATA_MAGIC ((MDBX_MAGIC << 8) + MDBX_DATA_VERSION)
#define MDBX_DATA_MAGIC_DEVEL ((MDBX_MAGIC << 8) + 255) #define MDBX_DATA_MAGIC_DEVEL ((MDBX_MAGIC << 8) + 255)
@ -676,7 +673,7 @@ typedef struct MDBX_lockinfo {
#if MDBX_WORDBITS >= 64 #if MDBX_WORDBITS >= 64
#define MAX_MAPSIZE MAX_MAPSIZE64 #define MAX_MAPSIZE MAX_MAPSIZE64
#define MDBX_READERS_LIMIT \ #define MDBX_READERS_LIMIT \
((65536 - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader) + 1) ((65536 - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader))
#else #else
#define MDBX_READERS_LIMIT 1024 #define MDBX_READERS_LIMIT 1024
#define MAX_MAPSIZE MAX_MAPSIZE32 #define MAX_MAPSIZE MAX_MAPSIZE32
@ -779,7 +776,7 @@ struct MDBX_txn {
#define mt_end_pgno mt_geo.now #define mt_end_pgno mt_geo.now
/* Transaction Flags */ /* Transaction Flags */
/* mdbx_txn_begin() flags */ /* mdbx_txn_begin() flags */
#define MDBX_TXN_BEGIN_FLAGS \ #define MDBX_TXN_BEGIN_FLAGS \
(MDBX_NOMETASYNC | MDBX_NOSYNC | MDBX_MAPASYNC | MDBX_RDONLY | MDBX_TRYTXN) (MDBX_NOMETASYNC | MDBX_NOSYNC | MDBX_MAPASYNC | MDBX_RDONLY | MDBX_TRYTXN)
/* internal txn flags */ /* internal txn flags */
@ -788,7 +785,7 @@ struct MDBX_txn {
#define MDBX_TXN_DIRTY 0x04 /* must write, even if dirty list is empty */ #define MDBX_TXN_DIRTY 0x04 /* must write, even if dirty list is empty */
#define MDBX_TXN_SPILLS 0x08 /* txn or a parent has spilled pages */ #define MDBX_TXN_SPILLS 0x08 /* txn or a parent has spilled pages */
#define MDBX_TXN_HAS_CHILD 0x10 /* txn has an MDBX_txn.mt_child */ #define MDBX_TXN_HAS_CHILD 0x10 /* txn has an MDBX_txn.mt_child */
/* most operations on the txn are currently illegal */ /* most operations on the txn are currently illegal */
#define MDBX_TXN_BLOCKED \ #define MDBX_TXN_BLOCKED \
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD) (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_HAS_CHILD)
unsigned mt_flags; unsigned mt_flags;
@ -804,7 +801,7 @@ struct MDBX_txn {
/* Array of sequence numbers for each DB handle */ /* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs; unsigned *mt_dbiseqs;
/* Transaction DB Flags */ /* Transaction DB Flags */
#define DB_DIRTY MDBX_TBL_DIRTY /* DB was written in this txn */ #define DB_DIRTY MDBX_TBL_DIRTY /* DB was written in this txn */
#define DB_STALE MDBX_TBL_STALE /* Named-DB record is older than txnID */ #define DB_STALE MDBX_TBL_STALE /* Named-DB record is older than txnID */
#define DB_FRESH MDBX_TBL_FRESH /* Named-DB handle opened in this txn */ #define DB_FRESH MDBX_TBL_FRESH /* Named-DB handle opened in this txn */
@ -948,13 +945,13 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd #define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck #define me_lck me_lck_mmap.lck
/* Failed to update the meta page. Probably an I/O error. */ /* Failed to update the meta page. Probably an I/O error. */
#define MDBX_FATAL_ERROR UINT32_C(0x80000000) #define MDBX_FATAL_ERROR UINT32_C(0x80000000)
/* Additional flag for mdbx_sync_locked() */ /* Additional flag for mdbx_sync_locked() */
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000) #define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
/* Some fields are initialized. */ /* Some fields are initialized. */
#define MDBX_ENV_ACTIVE UINT32_C(0x20000000) #define MDBX_ENV_ACTIVE UINT32_C(0x20000000)
/* me_txkey is set */ /* me_txkey is set */
#define MDBX_ENV_TXKEY UINT32_C(0x10000000) #define MDBX_ENV_TXKEY UINT32_C(0x10000000)
uint32_t me_flags; /* see mdbx_env */ uint32_t me_flags; /* see mdbx_env */
unsigned me_psize; /* DB page size, inited from me_os_psize */ unsigned me_psize; /* DB page size, inited from me_os_psize */
@ -1231,7 +1228,7 @@ MDBX_INTERNAL_FUNC void mdbx_rthc_thread_dtor(void *ptr);
/* Default size of memory map. /* Default size of memory map.
* This is certainly too small for any actual applications. Apps should * This is certainly too small for any actual applications. Apps should
* always set the size explicitly using mdbx_env_set_mapsize(). */ * always set the size explicitly using mdbx_env_set_mapsize(). */
#define DEFAULT_MAPSIZE 1048576 #define DEFAULT_MAPSIZE MEGABYTE
/* Number of slots in the reader table. /* Number of slots in the reader table.
* This value was chosen somewhat arbitrarily. The 61 is a prime number, * This value was chosen somewhat arbitrarily. The 61 is a prime number,
@ -1290,12 +1287,12 @@ typedef struct MDBX_node {
}; };
#endif #endif
/* mdbx_node Flags */ /* mdbx_node Flags */
#define F_BIGDATA 0x01 /* data put on overflow page */ #define F_BIGDATA 0x01 /* data put on overflow page */
#define F_SUBDATA 0x02 /* data is a sub-database */ #define F_SUBDATA 0x02 /* data is a sub-database */
#define F_DUPDATA 0x04 /* data has duplicates */ #define F_DUPDATA 0x04 /* data has duplicates */
/* valid flags for mdbx_node_add() */ /* valid flags for mdbx_node_add() */
#define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND) #define NODE_ADD_FLAGS (F_DUPDATA | F_SUBDATA | MDBX_RESERVE | MDBX_APPEND)
uint8_t mn_data[/* C99 */]; /* key and data are appended here */ uint8_t mn_data[/* C99 */]; /* key and data are appended here */
} MDBX_node; } MDBX_node;