mdbx: добавление и использование ptr_disp() и ptr_dist().

Для уменьшения кастинга типов указателей и потенциальной нагрузки оптимизатора/кодогенератора алиасингом.
This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2022-11-29 01:10:44 +03:00
parent d6b9a71825
commit 01a39e7dc2
4 changed files with 248 additions and 253 deletions

View File

@ -260,24 +260,24 @@ static __always_inline void unaligned_poke_u64(const size_t expected_alignment,
} }
#define UNALIGNED_PEEK_8(ptr, struct, field) \ #define UNALIGNED_PEEK_8(ptr, struct, field) \
peek_u8((const uint8_t *)(ptr) + offsetof(struct, field)) peek_u8(ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_8(ptr, struct, field, value) \ #define UNALIGNED_POKE_8(ptr, struct, field, value) \
poke_u8((uint8_t *)(ptr) + offsetof(struct, field), value) poke_u8(ptr_disp(ptr, offsetof(struct, field)), value)
#define UNALIGNED_PEEK_16(ptr, struct, field) \ #define UNALIGNED_PEEK_16(ptr, struct, field) \
unaligned_peek_u16(1, (const char *)(ptr) + offsetof(struct, field)) unaligned_peek_u16(1, ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_16(ptr, struct, field, value) \ #define UNALIGNED_POKE_16(ptr, struct, field, value) \
unaligned_poke_u16(1, (char *)(ptr) + offsetof(struct, field), value) unaligned_poke_u16(1, ptr_disp(ptr, offsetof(struct, field)), value)
#define UNALIGNED_PEEK_32(ptr, struct, field) \ #define UNALIGNED_PEEK_32(ptr, struct, field) \
unaligned_peek_u32(1, (const char *)(ptr) + offsetof(struct, field)) unaligned_peek_u32(1, ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_32(ptr, struct, field, value) \ #define UNALIGNED_POKE_32(ptr, struct, field, value) \
unaligned_poke_u32(1, (char *)(ptr) + offsetof(struct, field), value) unaligned_poke_u32(1, ptr_disp(ptr, offsetof(struct, field)), value)
#define UNALIGNED_PEEK_64(ptr, struct, field) \ #define UNALIGNED_PEEK_64(ptr, struct, field) \
unaligned_peek_u64(1, (const char *)(ptr) + offsetof(struct, field)) unaligned_peek_u64(1, ptr_disp(ptr, offsetof(struct, field)))
#define UNALIGNED_POKE_64(ptr, struct, field, value) \ #define UNALIGNED_POKE_64(ptr, struct, field, value) \
unaligned_poke_u64(1, (char *)(ptr) + offsetof(struct, field), value) unaligned_poke_u64(1, ptr_disp(ptr, offsetof(struct, field)), value)
/* Get the page number pointed to by a branch node */ /* Get the page number pointed to by a branch node */
MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t
@ -341,13 +341,13 @@ static __always_inline void node_set_flags(MDBX_node *const __restrict node,
/* Address of the key for the node */ /* Address of the key for the node */
MDBX_NOTHROW_PURE_FUNCTION static __always_inline void * MDBX_NOTHROW_PURE_FUNCTION static __always_inline void *
node_key(const MDBX_node *const __restrict node) { node_key(const MDBX_node *const __restrict node) {
return (char *)node + NODESIZE; return ptr_disp(node, NODESIZE);
} }
/* Address of the data for a node */ /* Address of the data for a node */
MDBX_NOTHROW_PURE_FUNCTION static __always_inline void * MDBX_NOTHROW_PURE_FUNCTION static __always_inline void *
node_data(const MDBX_node *const __restrict node) { node_data(const MDBX_node *const __restrict node) {
return (char *)node_key(node) + node_ks(node); return ptr_disp(node_key(node), node_ks(node));
} }
/* Size of a node in a leaf page with a given key and data. /* Size of a node in a leaf page with a given key and data.
@ -653,7 +653,7 @@ pgno2bytes(const MDBX_env *env, size_t pgno) {
MDBX_NOTHROW_PURE_FUNCTION static __always_inline MDBX_page * MDBX_NOTHROW_PURE_FUNCTION static __always_inline MDBX_page *
pgno2page(const MDBX_env *env, size_t pgno) { pgno2page(const MDBX_env *env, size_t pgno) {
return (MDBX_page *)(env->me_map + pgno2bytes(env, pgno)); return ptr_disp(env->me_map, pgno2bytes(env, pgno));
} }
MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t MDBX_NOTHROW_PURE_FUNCTION static __always_inline pgno_t
@ -680,7 +680,7 @@ bytes_align2os_bytes(const MDBX_env *env, size_t bytes) {
/* Address of first usable data byte in a page, after the header */ /* Address of first usable data byte in a page, after the header */
MDBX_NOTHROW_PURE_FUNCTION static __always_inline void * MDBX_NOTHROW_PURE_FUNCTION static __always_inline void *
page_data(const MDBX_page *mp) { page_data(const MDBX_page *mp) {
return (char *)mp + PAGEHDRSZ; return ptr_disp(mp, PAGEHDRSZ);
} }
MDBX_NOTHROW_PURE_FUNCTION static __always_inline const MDBX_page * MDBX_NOTHROW_PURE_FUNCTION static __always_inline const MDBX_page *
@ -799,7 +799,7 @@ page_node(const MDBX_page *mp, size_t i) {
assert(PAGETYPE_COMPAT(mp) == P_LEAF || PAGETYPE_WHOLE(mp) == P_BRANCH); assert(PAGETYPE_COMPAT(mp) == P_LEAF || PAGETYPE_WHOLE(mp) == P_BRANCH);
assert(page_numkeys(mp) > i); assert(page_numkeys(mp) > i);
assert(mp->mp_ptrs[i] % 2 == 0); assert(mp->mp_ptrs[i] % 2 == 0);
return (MDBX_node *)((char *)mp + mp->mp_ptrs[i] + PAGEHDRSZ); return ptr_disp(mp, mp->mp_ptrs[i] + PAGEHDRSZ);
} }
/* The address of a key in a LEAF2 page. /* The address of a key in a LEAF2 page.
@ -810,7 +810,7 @@ page_leaf2key(const MDBX_page *mp, size_t i, size_t keysize) {
assert(PAGETYPE_COMPAT(mp) == (P_LEAF | P_LEAF2)); assert(PAGETYPE_COMPAT(mp) == (P_LEAF | P_LEAF2));
assert(mp->mp_leaf2_ksize == keysize); assert(mp->mp_leaf2_ksize == keysize);
(void)keysize; (void)keysize;
return (char *)mp + PAGEHDRSZ + (i * mp->mp_leaf2_ksize); return ptr_disp(mp, PAGEHDRSZ + i * mp->mp_leaf2_ksize);
} }
/* Set the node's key into keyptr. */ /* Set the node's key into keyptr. */
@ -2114,7 +2114,7 @@ static int lcklist_detach_locked(MDBX_env *env) {
} while (++r != end); \ } while (++r != end); \
\ \
if (unlikely(key_diff_mask < 256)) { \ if (unlikely(key_diff_mask < 256)) { \
memcpy(begin, tmp, (char *)end - (char *)begin); \ memcpy(begin, tmp, ptr_dist(end, begin)); \
break; \ break; \
} \ } \
end = (r = tmp) + length; \ end = (r = tmp) + length; \
@ -3698,7 +3698,7 @@ static MDBX_page *page_malloc(MDBX_txn *txn, size_t num) {
size_t skip = PAGEHDRSZ; size_t skip = PAGEHDRSZ;
if (num > 1) if (num > 1)
skip += pgno2bytes(env, num - 1); skip += pgno2bytes(env, num - 1);
memset((char *)np + skip, 0, size - skip); memset(ptr_disp(np, skip), 0, size - skip);
} }
#if MDBX_DEBUG #if MDBX_DEBUG
np->mp_pgno = 0; np->mp_pgno = 0;
@ -4064,7 +4064,7 @@ __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
} else { } else {
struct iovec iov[MDBX_AUXILARY_IOV_MAX]; struct iovec iov[MDBX_AUXILARY_IOV_MAX];
iov[0].iov_len = env->me_psize; iov[0].iov_len = env->me_psize;
iov[0].iov_base = (char *)env->me_pbuf + env->me_psize; iov[0].iov_base = ptr_disp(env->me_pbuf, env->me_psize);
size_t iov_off = pgno2bytes(env, pgno), n = 1; size_t iov_off = pgno2bytes(env, pgno), n = 1;
while (--npages) { while (--npages) {
iov[n] = iov[0]; iov[n] = iov[0];
@ -4475,10 +4475,10 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0); eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0);
if (likely(ctx->err == MDBX_SUCCESS)) { if (likely(ctx->err == MDBX_SUCCESS)) {
VALGRIND_MAKE_MEM_DEFINED(env->me_map + offset, bytes); const MDBX_page *const rp = ptr_disp(env->me_map, offset);
MDBX_ASAN_UNPOISON_MEMORY_REGION(env->me_map + offset, bytes); VALGRIND_MAKE_MEM_DEFINED(rp, bytes);
osal_flush_incoherent_mmap(env->me_map + offset, bytes, env->me_os_psize); MDBX_ASAN_UNPOISON_MEMORY_REGION(rp, bytes);
const MDBX_page *const rp = (const MDBX_page *)(env->me_map + offset); osal_flush_incoherent_mmap(rp, bytes, env->me_os_psize);
/* check with timeout as the workaround /* check with timeout as the workaround
* for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */ * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
if (unlikely(memcmp(wp, rp, bytes))) { if (unlikely(memcmp(wp, rp, bytes))) {
@ -4504,7 +4504,7 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
size_t npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u; size_t npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u;
size_t chunk = pgno2bytes(env, npages); size_t chunk = pgno2bytes(env, npages);
eASSERT(env, bytes >= chunk); eASSERT(env, bytes >= chunk);
MDBX_page *next = (MDBX_page *)((char *)wp + chunk); MDBX_page *next = ptr_disp(wp, chunk);
dpage_free(env, wp, npages); dpage_free(env, wp, npages);
wp = next; wp = next;
offset += chunk; offset += chunk;
@ -5640,16 +5640,15 @@ __cold static int set_readahead(const MDBX_env *env, const pgno_t edge,
#endif /* F_RDAHEAD */ #endif /* F_RDAHEAD */
int err; int err;
void *const ptr = ptr_disp(env->me_map, offset);
if (enable) { if (enable) {
#if defined(MADV_NORMAL) #if defined(MADV_NORMAL)
err = madvise(env->me_map + offset, length, MADV_NORMAL) err =
? ignore_enosys(errno) madvise(ptr, length, MADV_NORMAL) ? ignore_enosys(errno) : MDBX_SUCCESS;
: MDBX_SUCCESS;
if (unlikely(MDBX_IS_ERROR(err))) if (unlikely(MDBX_IS_ERROR(err)))
return err; return err;
#elif defined(POSIX_MADV_NORMAL) #elif defined(POSIX_MADV_NORMAL)
err = ignore_enosys( err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_NORMAL));
posix_madvise(env->me_map + offset, length, POSIX_MADV_NORMAL));
if (unlikely(MDBX_IS_ERROR(err))) if (unlikely(MDBX_IS_ERROR(err)))
return err; return err;
#elif defined(POSIX_FADV_NORMAL) && defined(POSIX_FADV_WILLNEED) #elif defined(POSIX_FADV_NORMAL) && defined(POSIX_FADV_WILLNEED)
@ -5678,20 +5677,18 @@ __cold static int set_readahead(const MDBX_env *env, const pgno_t edge,
(void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl( (void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl(
env->me_lazy_fd, F_RDADVISE, &hint); env->me_lazy_fd, F_RDADVISE, &hint);
#elif defined(MADV_WILLNEED) #elif defined(MADV_WILLNEED)
err = madvise(env->me_map + offset, length, MADV_WILLNEED) err = madvise(ptr, length, MADV_WILLNEED) ? ignore_enosys(errno)
? ignore_enosys(errno) : MDBX_SUCCESS;
: MDBX_SUCCESS;
if (unlikely(MDBX_IS_ERROR(err))) if (unlikely(MDBX_IS_ERROR(err)))
return err; return err;
#elif defined(POSIX_MADV_WILLNEED) #elif defined(POSIX_MADV_WILLNEED)
err = ignore_enosys( err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_WILLNEED));
posix_madvise(env->me_map + offset, length, POSIX_MADV_WILLNEED));
if (unlikely(MDBX_IS_ERROR(err))) if (unlikely(MDBX_IS_ERROR(err)))
return err; return err;
#elif defined(_WIN32) || defined(_WIN64) #elif defined(_WIN32) || defined(_WIN64)
if (mdbx_PrefetchVirtualMemory) { if (mdbx_PrefetchVirtualMemory) {
WIN32_MEMORY_RANGE_ENTRY hint; WIN32_MEMORY_RANGE_ENTRY hint;
hint.VirtualAddress = env->me_map + offset; hint.VirtualAddress = ptr;
hint.NumberOfBytes = length; hint.NumberOfBytes = length;
(void)mdbx_PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0); (void)mdbx_PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0);
} }
@ -5706,14 +5703,12 @@ __cold static int set_readahead(const MDBX_env *env, const pgno_t edge,
} }
} else { } else {
#if defined(MADV_RANDOM) #if defined(MADV_RANDOM)
err = madvise(env->me_map + offset, length, MADV_RANDOM) err =
? ignore_enosys(errno) madvise(ptr, length, MADV_RANDOM) ? ignore_enosys(errno) : MDBX_SUCCESS;
: MDBX_SUCCESS;
if (unlikely(MDBX_IS_ERROR(err))) if (unlikely(MDBX_IS_ERROR(err)))
return err; return err;
#elif defined(POSIX_MADV_RANDOM) #elif defined(POSIX_MADV_RANDOM)
err = ignore_enosys( err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_RANDOM));
posix_madvise(env->me_map + offset, length, POSIX_MADV_RANDOM));
if (unlikely(MDBX_IS_ERROR(err))) if (unlikely(MDBX_IS_ERROR(err)))
return err; return err;
#elif defined(POSIX_FADV_RANDOM) #elif defined(POSIX_FADV_RANDOM)
@ -5787,14 +5782,15 @@ __cold static void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno,
munlock_begin % env->me_os_psize == 0 && munlock_begin % env->me_os_psize == 0 &&
munlock_size % env->me_os_psize == 0); munlock_size % env->me_os_psize == 0);
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
err = VirtualUnlock(env->me_map + munlock_begin, munlock_size) err = VirtualUnlock(ptr_disp(env->me_map, munlock_begin), munlock_size)
? MDBX_SUCCESS ? MDBX_SUCCESS
: (int)GetLastError(); : (int)GetLastError();
if (err == ERROR_NOT_LOCKED) if (err == ERROR_NOT_LOCKED)
err = MDBX_SUCCESS; err = MDBX_SUCCESS;
#elif defined(_POSIX_MEMLOCK_RANGE) #elif defined(_POSIX_MEMLOCK_RANGE)
err = munlock(env->me_map + munlock_begin, munlock_size) ? errno err = munlock(ptr_disp(env->me_map, munlock_begin), munlock_size)
: MDBX_SUCCESS; ? errno
: MDBX_SUCCESS;
#endif #endif
if (likely(err == MDBX_SUCCESS)) if (likely(err == MDBX_SUCCESS))
update_mlcnt(env, aligned_pgno, false); update_mlcnt(env, aligned_pgno, false);
@ -5821,7 +5817,7 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
const size_t prev_size = env->me_dxb_mmap.current; const size_t prev_size = env->me_dxb_mmap.current;
const size_t prev_limit = env->me_dxb_mmap.limit; const size_t prev_limit = env->me_dxb_mmap.limit;
#if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND) #if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND)
const void *const prev_addr = env->me_map; const void *const prev_map = env->me_dxb_mmap.base;
#endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */ #endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */
VERBOSE("resize datafile/mapping: " VERBOSE("resize datafile/mapping: "
@ -5932,20 +5928,20 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
rc = MDBX_RESULT_TRUE; rc = MDBX_RESULT_TRUE;
#if defined(MADV_REMOVE) #if defined(MADV_REMOVE)
if (env->me_flags & MDBX_WRITEMAP) if (env->me_flags & MDBX_WRITEMAP)
rc = rc = madvise(ptr_disp(env->me_map, size_bytes), prev_size - size_bytes,
madvise(env->me_map + size_bytes, prev_size - size_bytes, MADV_REMOVE) MADV_REMOVE)
? ignore_enosys(errno) ? ignore_enosys(errno)
: MDBX_SUCCESS; : MDBX_SUCCESS;
#endif /* MADV_REMOVE */ #endif /* MADV_REMOVE */
#if defined(MADV_DONTNEED) #if defined(MADV_DONTNEED)
if (rc == MDBX_RESULT_TRUE) if (rc == MDBX_RESULT_TRUE)
rc = madvise(env->me_map + size_bytes, prev_size - size_bytes, rc = madvise(ptr_disp(env->me_map, size_bytes), prev_size - size_bytes,
MADV_DONTNEED) MADV_DONTNEED)
? ignore_enosys(errno) ? ignore_enosys(errno)
: MDBX_SUCCESS; : MDBX_SUCCESS;
#elif defined(POSIX_MADV_DONTNEED) #elif defined(POSIX_MADV_DONTNEED)
if (rc == MDBX_RESULT_TRUE) if (rc == MDBX_RESULT_TRUE)
rc = ignore_enosys(posix_madvise(env->me_map + size_bytes, rc = ignore_enosys(posix_madvise(ptr_disp(env->me_map, size_bytes),
prev_size - size_bytes, prev_size - size_bytes,
POSIX_MADV_DONTNEED)); POSIX_MADV_DONTNEED));
#elif defined(POSIX_FADV_DONTNEED) #elif defined(POSIX_FADV_DONTNEED)
@ -5988,7 +5984,7 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
!(env->me_flags & MDBX_NORDAHEAD) && !(env->me_flags & MDBX_NORDAHEAD) &&
mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size); mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size);
const bool force = limit_bytes != prev_limit || const bool force = limit_bytes != prev_limit ||
env->me_dxb_mmap.address != prev_addr env->me_dxb_mmap.base != prev_map
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
|| prev_size > size_bytes || prev_size > size_bytes
#endif /* Windows */ #endif /* Windows */
@ -6003,7 +5999,7 @@ bailout:
eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize);
eASSERT(env, size_bytes == env->me_dxb_mmap.current); eASSERT(env, size_bytes == env->me_dxb_mmap.current);
#ifdef MDBX_USE_VALGRIND #ifdef MDBX_USE_VALGRIND
if (prev_limit != env->me_dxb_mmap.limit || prev_addr != env->me_map) { if (prev_limit != env->me_dxb_mmap.limit || prev_map != env->me_map) {
VALGRIND_DISCARD(env->me_valgrind_handle); VALGRIND_DISCARD(env->me_valgrind_handle);
env->me_valgrind_handle = 0; env->me_valgrind_handle = 0;
if (env->me_dxb_mmap.limit) if (env->me_dxb_mmap.limit)
@ -6023,7 +6019,7 @@ bailout:
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
prev_size, size_bytes, prev_limit, limit_bytes, rc); prev_size, size_bytes, prev_limit, limit_bytes, rc);
} }
if (!env->me_dxb_mmap.address) { if (!env->me_dxb_mmap.base) {
env->me_flags |= MDBX_FATAL_ERROR; env->me_flags |= MDBX_FATAL_ERROR;
if (env->me_txn) if (env->me_txn)
env->me_txn->mt_flags |= MDBX_TXN_ERROR; env->me_txn->mt_flags |= MDBX_TXN_ERROR;
@ -6079,7 +6075,7 @@ static int meta_unsteady(int err, MDBX_env *env, const txnid_t early_than,
const uint64_t wipe = MDBX_DATASIGN_NONE; const uint64_t wipe = MDBX_DATASIGN_NONE;
const void *ptr = &wipe; const void *ptr = &wipe;
size_t bytes = sizeof(meta->mm_sign), size_t bytes = sizeof(meta->mm_sign),
offset = (uint8_t *)&meta->mm_sign - env->me_map; offset = ptr_dist(&meta->mm_sign, env->me_map);
if (env->me_flags & MDBX_WRITEMAP) { if (env->me_flags & MDBX_WRITEMAP) {
unaligned_poke_u64(4, meta->mm_sign, wipe); unaligned_poke_u64(4, meta->mm_sign, wipe);
osal_flush_incoherent_cpu_writeback(); osal_flush_incoherent_cpu_writeback();
@ -6093,7 +6089,7 @@ static int meta_unsteady(int err, MDBX_env *env, const txnid_t early_than,
return err; return err;
} }
ptr = data_page(meta); ptr = data_page(meta);
offset = (uint8_t *)ptr - env->me_map; offset = ptr_dist(ptr, env->me_map);
bytes = env->me_psize; bytes = env->me_psize;
} }
@ -6517,8 +6513,7 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len,
#ifndef __SANITIZE_ADDRESS__ #ifndef __SANITIZE_ADDRESS__
found: found:
#endif /* __SANITIZE_ADDRESS__ */ #endif /* __SANITIZE_ADDRESS__ */
return (pgno_t *)((char *)range - return ptr_disp(range, -(__builtin_clzl(mask) >> sizeof(size_t) / 4));
(__builtin_clzl(mask) >> sizeof(size_t) / 4));
} }
range -= 4; range -= 4;
} while (range > detent + 3); } while (range > detent + 3);
@ -6796,8 +6791,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
flags += MDBX_ALLOC_COALESCE; flags += MDBX_ALLOC_COALESCE;
} }
MDBX_cursor *const gc = MDBX_cursor *const gc = ptr_disp(env->me_txn0, sizeof(MDBX_txn));
(MDBX_cursor *)((char *)env->me_txn0 + sizeof(MDBX_txn));
eASSERT(env, mc != gc && gc->mc_next == nullptr); eASSERT(env, mc != gc && gc->mc_next == nullptr);
gc->mc_txn = txn; gc->mc_txn = txn;
gc->mc_flags = 0; gc->mc_flags = 0;
@ -7339,8 +7333,8 @@ __hot static void page_copy(MDBX_page *const dst, const MDBX_page *const src,
const size_t size) { const size_t size) {
STATIC_ASSERT(UINT16_MAX > MAX_PAGESIZE - PAGEHDRSZ); STATIC_ASSERT(UINT16_MAX > MAX_PAGESIZE - PAGEHDRSZ);
STATIC_ASSERT(MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4); STATIC_ASSERT(MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4);
char *copy_dst = (void *)dst; void *copy_dst = dst;
const char *copy_src = (const void *)src; const void *copy_src = src;
size_t copy_len = size; size_t copy_len = size;
if (src->mp_flags & P_LEAF2) { if (src->mp_flags & P_LEAF2) {
copy_len = PAGEHDRSZ + src->mp_leaf2_ksize * page_numkeys(src); copy_len = PAGEHDRSZ + src->mp_leaf2_ksize * page_numkeys(src);
@ -7358,8 +7352,8 @@ __hot static void page_copy(MDBX_page *const dst, const MDBX_page *const src,
if (unlikely(upper > copy_len)) if (unlikely(upper > copy_len))
goto bailout; goto bailout;
memcpy(copy_dst, copy_src, lower); memcpy(copy_dst, copy_src, lower);
copy_dst += upper; copy_dst = ptr_disp(copy_dst, upper);
copy_src += upper; copy_src = ptr_disp(copy_src, upper);
copy_len -= upper; copy_len -= upper;
} }
} }
@ -7585,7 +7579,7 @@ static int meta_sync(const MDBX_env *env, const meta_ptr_t head) {
#endif /* MDBX_ENABLE_PGOP_STAT */ #endif /* MDBX_ENABLE_PGOP_STAT */
const MDBX_page *page = data_page(head.ptr_c); const MDBX_page *page = data_page(head.ptr_c);
rc = osal_pwrite(env->me_fd4meta, page, env->me_psize, rc = osal_pwrite(env->me_fd4meta, page, env->me_psize,
(uint8_t *)page - env->me_map); ptr_dist(page, env->me_map));
if (likely(rc == MDBX_SUCCESS) && env->me_fd4meta == env->me_lazy_fd) { if (likely(rc == MDBX_SUCCESS) && env->me_fd4meta == env->me_lazy_fd) {
rc = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ); rc = osal_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
@ -7947,10 +7941,11 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) {
if (edge > last) { if (edge > last) {
eASSERT(env, last >= NUM_METAS); eASSERT(env, last >= NUM_METAS);
env->me_poison_edge = last; env->me_poison_edge = last;
VALGRIND_MAKE_MEM_NOACCESS(env->me_map + pgno2bytes(env, last), VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->me_map, pgno2bytes(env, last)),
pgno2bytes(env, edge - last)); pgno2bytes(env, edge - last));
MDBX_ASAN_POISON_MEMORY_REGION(env->me_map + pgno2bytes(env, last), MDBX_ASAN_POISON_MEMORY_REGION(
pgno2bytes(env, edge - last)); ptr_disp(env->me_map, pgno2bytes(env, last)),
pgno2bytes(env, edge - last));
} }
if (should_unlock) if (should_unlock)
mdbx_txn_unlock(env); mdbx_txn_unlock(env);
@ -8221,7 +8216,7 @@ static int coherency_check_written(const MDBX_env *env, const txnid_t txnid,
if (report) if (report)
WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s", WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s",
(head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid, (head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid,
bytes2pgno(env, (const uint8_t *)meta - env->me_dxb_mmap.dxb), bytes2pgno(env, ptr_dist(meta, env->me_map)),
"(workaround for incoherent flaw of unified page/buffer cache)"); "(workaround for incoherent flaw of unified page/buffer cache)");
return coherency_timeout(timestamp, 0); return coherency_timeout(timestamp, 0);
} }
@ -8548,7 +8543,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
} }
tASSERT(txn, txn == env->me_txn0); tASSERT(txn, txn == env->me_txn0);
MDBX_cursor *const gc = (MDBX_cursor *)((char *)txn + sizeof(MDBX_txn)); MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn));
rc = cursor_init(gc, txn, FREE_DBI); rc = cursor_init(gc, txn, FREE_DBI);
if (rc != MDBX_SUCCESS) if (rc != MDBX_SUCCESS)
goto bailout; goto bailout;
@ -8718,9 +8713,9 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
#endif /* MDBX_DEBUG */ #endif /* MDBX_DEBUG */
memset(txn, 0, tsize); memset(txn, 0, tsize);
txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_dbxs = env->me_dbxs; /* static */
txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); txn->mt_dbs = ptr_disp(txn, tsize);
txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); txn->mt_cursors = ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs);
txn->mt_dbistate = (uint8_t *)txn + size - env->me_maxdbs; txn->mt_dbistate = ptr_disp(txn, size - env->me_maxdbs);
txn->mt_flags = flags; txn->mt_flags = flags;
txn->mt_env = env; txn->mt_env = env;
@ -9340,7 +9335,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) {
"the parent", "the parent",
rc); rc);
parent->mt_flags |= MDBX_TXN_ERROR; parent->mt_flags |= MDBX_TXN_ERROR;
if (!env->me_dxb_mmap.address) if (!env->me_dxb_mmap.base)
env->me_flags |= MDBX_FATAL_ERROR; env->me_flags |= MDBX_FATAL_ERROR;
} }
} }
@ -9558,7 +9553,7 @@ static __always_inline size_t gcu_backlog_size(MDBX_txn *txn) {
static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) { static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
int err = MDBX_SUCCESS; int err = MDBX_SUCCESS;
if (ctx->retired_stored) { if (ctx->retired_stored) {
MDBX_cursor *const gc = (MDBX_cursor *)((char *)txn + sizeof(MDBX_txn)); MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn));
tASSERT(txn, txn == txn->mt_env->me_txn0 && gc->mc_next == nullptr); tASSERT(txn, txn == txn->mt_env->me_txn0 && gc->mc_next == nullptr);
gc->mc_txn = txn; gc->mc_txn = txn;
gc->mc_flags = 0; gc->mc_flags = 0;
@ -11584,7 +11579,7 @@ static int validate_meta_copy(MDBX_env *env, const MDBX_meta *meta,
MDBX_meta *dest) { MDBX_meta *dest) {
*dest = *meta; *dest = *meta;
return validate_meta(env, dest, data_page(meta), return validate_meta(env, dest, data_page(meta),
bytes2pgno(env, (uint8_t *)meta - env->me_map), nullptr); bytes2pgno(env, ptr_dist(meta, env->me_map)), nullptr);
} }
/* Read the environment parameters of a DB environment /* Read the environment parameters of a DB environment
@ -11753,7 +11748,7 @@ __cold static MDBX_page *meta_model(const MDBX_env *env, MDBX_page *model,
meta_set_txnid(env, model_meta, MIN_TXNID + num); meta_set_txnid(env, model_meta, MIN_TXNID + num);
unaligned_poke_u64(4, model_meta->mm_sign, meta_sign(model_meta)); unaligned_poke_u64(4, model_meta->mm_sign, meta_sign(model_meta));
eASSERT(env, coherency_check_meta(env, model_meta, true)); eASSERT(env, coherency_check_meta(env, model_meta, true));
return (MDBX_page *)((uint8_t *)model + env->me_psize); return ptr_disp(model, env->me_psize);
} }
/* Fill in most of the zeroed meta-pages for an empty database environment. /* Fill in most of the zeroed meta-pages for an empty database environment.
@ -11823,11 +11818,12 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
const pgno_t edge = env->me_poison_edge; const pgno_t edge = env->me_poison_edge;
if (edge > largest_pgno) { if (edge > largest_pgno) {
env->me_poison_edge = largest_pgno; env->me_poison_edge = largest_pgno;
VALGRIND_MAKE_MEM_NOACCESS(env->me_map + pgno2bytes(env, largest_pgno), VALGRIND_MAKE_MEM_NOACCESS(
pgno2bytes(env, edge - largest_pgno)); ptr_disp(env->me_map, pgno2bytes(env, largest_pgno)),
MDBX_ASAN_POISON_MEMORY_REGION(env->me_map + pgno2bytes(env, edge - largest_pgno));
pgno2bytes(env, largest_pgno), MDBX_ASAN_POISON_MEMORY_REGION(
pgno2bytes(env, edge - largest_pgno)); ptr_disp(env->me_map, pgno2bytes(env, largest_pgno)),
pgno2bytes(env, edge - largest_pgno));
} }
#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
#if MDBX_ENABLE_MADVISE && \ #if MDBX_ENABLE_MADVISE && \
@ -11863,13 +11859,13 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
if ((env->me_flags & MDBX_WRITEMAP) && linux_kernel_version > 0x04050000) if ((env->me_flags & MDBX_WRITEMAP) && linux_kernel_version > 0x04050000)
advise = MADV_FREE; advise = MADV_FREE;
#endif /* MADV_FREE */ #endif /* MADV_FREE */
int err = madvise(env->me_map + discard_edge_bytes, int err = madvise(ptr_disp(env->me_map, discard_edge_bytes),
prev_discarded_bytes - discard_edge_bytes, advise) prev_discarded_bytes - discard_edge_bytes, advise)
? ignore_enosys(errno) ? ignore_enosys(errno)
: MDBX_SUCCESS; : MDBX_SUCCESS;
#else #else
int err = ignore_enosys(posix_madvise( int err = ignore_enosys(posix_madvise(
env->me_map + discard_edge_bytes, ptr_disp(env->me_map, discard_edge_bytes),
prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED)); prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED));
#endif #endif
if (unlikely(MDBX_IS_ERROR(err))) { if (unlikely(MDBX_IS_ERROR(err))) {
@ -12060,9 +12056,9 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
#ifndef NDEBUG #ifndef NDEBUG
/* debug: provoke failure to catch a violators, but don't touch mm_psize /* debug: provoke failure to catch a violators, but don't touch mm_psize
* to allow readers catch actual pagesize. */ * to allow readers catch actual pagesize. */
uint8_t *provoke_begin = (uint8_t *)&target->mm_dbs[FREE_DBI].md_root; void *provoke_begin = &target->mm_dbs[FREE_DBI].md_root;
uint8_t *provoke_end = (uint8_t *)&target->mm_sign; void *provoke_end = &target->mm_sign;
memset(provoke_begin, 0xCC, provoke_end - provoke_begin); memset(provoke_begin, 0xCC, ptr_dist(provoke_end, provoke_begin));
jitter4testing(false); jitter4testing(false);
#endif #endif
@ -12102,7 +12098,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
#endif /* MDBX_ENABLE_PGOP_STAT */ #endif /* MDBX_ENABLE_PGOP_STAT */
const MDBX_page *page = data_page(target); const MDBX_page *page = data_page(target);
rc = osal_pwrite(env->me_fd4meta, page, env->me_psize, rc = osal_pwrite(env->me_fd4meta, page, env->me_psize,
(uint8_t *)page - env->me_map); ptr_dist(page, env->me_map));
if (likely(rc == MDBX_SUCCESS)) { if (likely(rc == MDBX_SUCCESS)) {
osal_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize); osal_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize);
if ((flags & MDBX_NOMETASYNC) == 0 && if ((flags & MDBX_NOMETASYNC) == 0 &&
@ -12122,14 +12118,14 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
#endif /* MDBX_ENABLE_PGOP_STAT */ #endif /* MDBX_ENABLE_PGOP_STAT */
const MDBX_meta undo_meta = *target; const MDBX_meta undo_meta = *target;
rc = osal_pwrite(env->me_fd4meta, pending, sizeof(MDBX_meta), rc = osal_pwrite(env->me_fd4meta, pending, sizeof(MDBX_meta),
(uint8_t *)target - env->me_map); ptr_dist(target, env->me_map));
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS)) {
undo: undo:
DEBUG("%s", "write failed, disk error?"); DEBUG("%s", "write failed, disk error?");
/* On a failure, the pagecache still contains the new data. /* On a failure, the pagecache still contains the new data.
* Try write some old data back, to prevent it from being used. */ * Try write some old data back, to prevent it from being used. */
osal_pwrite(env->me_fd4meta, &undo_meta, sizeof(MDBX_meta), osal_pwrite(env->me_fd4meta, &undo_meta, sizeof(MDBX_meta),
(uint8_t *)target - env->me_map); ptr_dist(target, env->me_map));
goto fail; goto fail;
} }
osal_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize); osal_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize);
@ -13028,9 +13024,9 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc,
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
if (env->me_dxb_mmap.filesize > used_bytes && if (env->me_dxb_mmap.filesize > used_bytes &&
env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) { env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) {
VALGRIND_MAKE_MEM_NOACCESS(env->me_map + used_bytes, VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->me_map, used_bytes),
env->me_dxb_mmap.filesize - used_bytes); env->me_dxb_mmap.filesize - used_bytes);
MDBX_ASAN_POISON_MEMORY_REGION(env->me_map + used_bytes, MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->me_map, used_bytes),
env->me_dxb_mmap.filesize - used_bytes); env->me_dxb_mmap.filesize - used_bytes);
} }
env->me_poison_edge = env->me_poison_edge =
@ -13052,7 +13048,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc,
err = validate_meta_copy(env, target, &clone); err = validate_meta_copy(env, target, &clone);
if (unlikely(err != MDBX_SUCCESS)) { if (unlikely(err != MDBX_SUCCESS)) {
ERROR("target meta[%u] is corrupted", ERROR("target meta[%u] is corrupted",
bytes2pgno(env, (uint8_t *)data_page(target) - env->me_map)); bytes2pgno(env, ptr_dist(data_page(target), env->me_map)));
meta_troika_dump(env, &troika); meta_troika_dump(env, &troika);
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
} }
@ -13094,7 +13090,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc,
err = validate_meta_copy(env, prefer_steady.ptr_c, &clone); err = validate_meta_copy(env, prefer_steady.ptr_c, &clone);
if (unlikely(err != MDBX_SUCCESS)) { if (unlikely(err != MDBX_SUCCESS)) {
ERROR("meta[%u] with %s txnid %" PRIaTXN " is corrupted, %s needed", ERROR("meta[%u] with %s txnid %" PRIaTXN " is corrupted, %s needed",
bytes2pgno(env, (uint8_t *)prefer_steady.ptr_c - env->me_map), bytes2pgno(env, ptr_dist(prefer_steady.ptr_c, env->me_map)),
"steady", prefer_steady.txnid, "manual recovery"); "steady", prefer_steady.txnid, "manual recovery");
meta_troika_dump(env, &troika); meta_troika_dump(env, &troika);
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
@ -13103,8 +13099,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc,
break; break;
} }
const pgno_t pgno = const pgno_t pgno = bytes2pgno(env, ptr_dist(recent.ptr_c, env->me_map));
bytes2pgno(env, (uint8_t *)recent.ptr_c - env->me_map);
const bool last_valid = const bool last_valid =
validate_meta_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS; validate_meta_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS;
eASSERT(env, eASSERT(env,
@ -13291,7 +13286,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc,
env->me_lck->mti_discarded_tail.weak, env->me_lck->mti_discarded_tail.weak,
bytes2pgno(env, env->me_dxb_mmap.current)); bytes2pgno(env, env->me_dxb_mmap.current));
err = err =
madvise(env->me_map + used_aligned2os_bytes, madvise(ptr_disp(env->me_map, used_aligned2os_bytes),
env->me_dxb_mmap.current - used_aligned2os_bytes, MADV_REMOVE) env->me_dxb_mmap.current - used_aligned2os_bytes, MADV_REMOVE)
? ignore_enosys(errno) ? ignore_enosys(errno)
: MDBX_SUCCESS; : MDBX_SUCCESS;
@ -13304,7 +13299,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc,
env->me_lck->mti_discarded_tail.weak, env->me_lck->mti_discarded_tail.weak,
bytes2pgno(env, env->me_dxb_mmap.current)); bytes2pgno(env, env->me_dxb_mmap.current));
err = err =
madvise(env->me_map + used_aligned2os_bytes, madvise(ptr_disp(env->me_map, used_aligned2os_bytes),
env->me_dxb_mmap.current - used_aligned2os_bytes, MADV_DONTNEED) env->me_dxb_mmap.current - used_aligned2os_bytes, MADV_DONTNEED)
? ignore_enosys(errno) ? ignore_enosys(errno)
: MDBX_SUCCESS; : MDBX_SUCCESS;
@ -13312,7 +13307,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc,
return err; return err;
#elif defined(POSIX_MADV_DONTNEED) #elif defined(POSIX_MADV_DONTNEED)
err = ignore_enosys(posix_madvise( err = ignore_enosys(posix_madvise(
env->me_map + used_aligned2os_bytes, ptr_disp(env->me_map, used_aligned2os_bytes),
env->me_dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED)); env->me_dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED));
if (unlikely(MDBX_IS_ERROR(err))) if (unlikely(MDBX_IS_ERROR(err)))
return err; return err;
@ -14331,11 +14326,13 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
memset(env->me_pbuf, -1, env->me_psize * 2); memset(env->me_pbuf, -1, env->me_psize * 2);
MDBX_txn *txn = osal_calloc(1, size); MDBX_txn *txn = osal_calloc(1, size);
if (txn) { if (txn) {
txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); txn->mt_dbs = ptr_disp(txn, tsize);
txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); txn->mt_cursors =
ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs);
txn->mt_dbiseqs = txn->mt_dbiseqs =
(MDBX_atomic_uint32_t *)(txn->mt_cursors + env->me_maxdbs); ptr_disp(txn->mt_cursors, sizeof(MDBX_cursor *) * env->me_maxdbs);
txn->mt_dbistate = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_dbistate = ptr_disp(
txn->mt_dbiseqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs);
txn->mt_env = env; txn->mt_env = env;
txn->mt_dbxs = env->me_dbxs; txn->mt_dbxs = env->me_dbxs;
txn->mt_flags = MDBX_TXN_FINISHED; txn->mt_flags = MDBX_TXN_FINISHED;
@ -14633,8 +14630,8 @@ __hot static int cmp_lexical(const MDBX_val *a, const MDBX_val *b) {
__hot static int cmp_reverse(const MDBX_val *a, const MDBX_val *b) { __hot static int cmp_reverse(const MDBX_val *a, const MDBX_val *b) {
const size_t shortest = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len; const size_t shortest = (a->iov_len < b->iov_len) ? a->iov_len : b->iov_len;
if (likely(shortest)) { if (likely(shortest)) {
const uint8_t *pa = (const uint8_t *)a->iov_base + a->iov_len; const uint8_t *pa = ptr_disp(a->iov_base, a->iov_len);
const uint8_t *pb = (const uint8_t *)b->iov_base + b->iov_len; const uint8_t *pb = ptr_disp(b->iov_base, b->iov_len);
const uint8_t *const end = pa - shortest; const uint8_t *const end = pa - shortest;
do { do {
int diff = *--pa - *--pb; int diff = *--pa - *--pb;
@ -14695,8 +14692,8 @@ __hot static struct node_result node_search(MDBX_cursor *mc,
do { do {
i = (low + high) >> 1; i = (low + high) >> 1;
nodekey.iov_base = page_leaf2key(mp, i, nodekey.iov_len); nodekey.iov_base = page_leaf2key(mp, i, nodekey.iov_len);
cASSERT(mc, (char *)mp + mc->mc_txn->mt_env->me_psize >= cASSERT(mc, ptr_disp(mp, mc->mc_txn->mt_env->me_psize) >=
(char *)nodekey.iov_base + nodekey.iov_len); ptr_disp(nodekey.iov_base, nodekey.iov_len));
int cr = cmp(key, &nodekey); int cr = cmp(key, &nodekey);
DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr);
if (cr > 0) if (cr > 0)
@ -14730,8 +14727,8 @@ __hot static struct node_result node_search(MDBX_cursor *mc,
node = page_node(mp, i); node = page_node(mp, i);
nodekey.iov_len = node_ks(node); nodekey.iov_len = node_ks(node);
nodekey.iov_base = node_key(node); nodekey.iov_base = node_key(node);
cASSERT(mc, (char *)mp + mc->mc_txn->mt_env->me_psize >= cASSERT(mc, ptr_disp(mp, mc->mc_txn->mt_env->me_psize) >=
(char *)nodekey.iov_base + nodekey.iov_len); ptr_disp(nodekey.iov_base, nodekey.iov_len));
int cr = cmp(key, &nodekey); int cr = cmp(key, &nodekey);
if (IS_LEAF(mp)) if (IS_LEAF(mp))
DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr); DEBUG("found leaf index %zu [%s], rc = %i", i, DKEY_DEBUG(&nodekey), cr);
@ -16689,11 +16686,11 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
} else { } else {
/* there's only a key anyway, so this is a no-op */ /* there's only a key anyway, so this is a no-op */
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
char *ptr;
size_t ksize = mc->mc_db->md_xsize; size_t ksize = mc->mc_db->md_xsize;
if (unlikely(key->iov_len != ksize)) if (unlikely(key->iov_len != ksize))
return MDBX_BAD_VALSIZE; return MDBX_BAD_VALSIZE;
ptr = page_leaf2key(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize); void *ptr =
page_leaf2key(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize);
memcpy(ptr, key->iov_base, ksize); memcpy(ptr, key->iov_base, ksize);
fix_parent: fix_parent:
/* if overwriting slot 0 of leaf, need to /* if overwriting slot 0 of leaf, need to
@ -16805,8 +16802,8 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
} else { } else {
olddata.iov_len = node_ds(node); olddata.iov_len = node_ds(node);
olddata.iov_base = node_data(node); olddata.iov_base = node_data(node);
cASSERT(mc, (char *)olddata.iov_base + olddata.iov_len <= cASSERT(mc, ptr_disp(olddata.iov_base, olddata.iov_len) <=
(char *)(mc->mc_pg[mc->mc_top]) + env->me_psize); ptr_disp(mc->mc_pg[mc->mc_top], env->me_psize));
/* DB has dups? */ /* DB has dups? */
if (mc->mc_db->md_flags & MDBX_DUPSORT) { if (mc->mc_db->md_flags & MDBX_DUPSORT) {
@ -16946,10 +16943,10 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
memcpy(page_data(mp), page_data(fp), memcpy(page_data(mp), page_data(fp),
page_numkeys(fp) * fp->mp_leaf2_ksize); page_numkeys(fp) * fp->mp_leaf2_ksize);
} else { } else {
memcpy((char *)mp + mp->mp_upper + PAGEHDRSZ, memcpy(ptr_disp(mp, mp->mp_upper + PAGEHDRSZ),
(char *)fp + fp->mp_upper + PAGEHDRSZ, ptr_disp(fp, fp->mp_upper + PAGEHDRSZ),
olddata.iov_len - fp->mp_upper - PAGEHDRSZ); olddata.iov_len - fp->mp_upper - PAGEHDRSZ);
memcpy((char *)(&mp->mp_ptrs), (char *)(&fp->mp_ptrs), memcpy(mp->mp_ptrs, fp->mp_ptrs,
page_numkeys(fp) * sizeof(mp->mp_ptrs[0])); page_numkeys(fp) * sizeof(mp->mp_ptrs[0]));
for (i = 0; i < page_numkeys(fp); i++) { for (i = 0; i < page_numkeys(fp); i++) {
cASSERT(mc, mp->mp_ptrs[i] + offset <= UINT16_MAX); cASSERT(mc, mp->mp_ptrs[i] + offset <= UINT16_MAX);
@ -16988,8 +16985,8 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
cASSERT(mc, key->iov_len < UINT16_MAX); cASSERT(mc, key->iov_len < UINT16_MAX);
node_set_ks(node, key->iov_len); node_set_ks(node, key->iov_len);
memcpy(node_key(node), key->iov_base, key->iov_len); memcpy(node_key(node), key->iov_base, key->iov_len);
cASSERT(mc, (char *)node_key(node) + node_ds(node) < cASSERT(mc, ptr_disp(node_key(node), node_ds(node)) <
(char *)(mc->mc_pg[mc->mc_top]) + env->me_psize); ptr_disp(mc->mc_pg[mc->mc_top], env->me_psize));
goto fix_parent; goto fix_parent;
} }
@ -17133,7 +17130,7 @@ new_sub:;
/* let caller know how many succeeded, if any */ /* let caller know how many succeeded, if any */
data[1].iov_len = mcount; data[1].iov_len = mcount;
if (mcount < dcount) { if (mcount < dcount) {
data[0].iov_base = (char *)data[0].iov_base + data[0].iov_len; data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len);
insert_key = insert_data = false; insert_key = insert_data = false;
goto more; goto more;
} }
@ -17350,12 +17347,12 @@ __hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc,
mp->mp_lower = (indx_t)lower; mp->mp_lower = (indx_t)lower;
mp->mp_upper = (indx_t)upper; mp->mp_upper = (indx_t)upper;
char *const ptr = page_leaf2key(mp, indx, ksize); void *const ptr = page_leaf2key(mp, indx, ksize);
cASSERT(mc, nkeys >= indx); cASSERT(mc, nkeys >= indx);
const size_t diff = nkeys - indx; const size_t diff = nkeys - indx;
if (likely(diff > 0)) if (likely(diff > 0))
/* Move higher keys up one slot. */ /* Move higher keys up one slot. */
memmove(ptr + ksize, ptr, diff * ksize); memmove(ptr_disp(ptr, ksize), ptr, diff * ksize);
/* insert new key */ /* insert new key */
memcpy(ptr, key->iov_base, ksize); memcpy(ptr, key->iov_base, ksize);
return MDBX_SUCCESS; return MDBX_SUCCESS;
@ -17515,9 +17512,9 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) {
if (IS_LEAF2(mp)) { if (IS_LEAF2(mp)) {
cASSERT(mc, ksize >= sizeof(indx_t)); cASSERT(mc, ksize >= sizeof(indx_t));
size_t diff = nkeys - 1 - hole; size_t diff = nkeys - 1 - hole;
char *base = page_leaf2key(mp, hole, ksize); void *const base = page_leaf2key(mp, hole, ksize);
if (diff) if (diff)
memmove(base, base + ksize, diff * ksize); memmove(base, ptr_disp(base, ksize), diff * ksize);
cASSERT(mc, mp->mp_lower >= sizeof(indx_t)); cASSERT(mc, mp->mp_lower >= sizeof(indx_t));
mp->mp_lower -= sizeof(indx_t); mp->mp_lower -= sizeof(indx_t);
cASSERT(mc, (size_t)UINT16_MAX - mp->mp_upper >= ksize - sizeof(indx_t)); cASSERT(mc, (size_t)UINT16_MAX - mp->mp_upper >= ksize - sizeof(indx_t));
@ -17541,8 +17538,8 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) {
? mp->mp_ptrs[r] + (indx_t)hole_size ? mp->mp_ptrs[r] + (indx_t)hole_size
: mp->mp_ptrs[r]; : mp->mp_ptrs[r];
char *base = (char *)mp + mp->mp_upper + PAGEHDRSZ; void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ);
memmove(base + hole_size, base, hole_offset - mp->mp_upper); memmove(ptr_disp(base, hole_size), base, hole_offset - mp->mp_upper);
cASSERT(mc, mp->mp_lower >= sizeof(indx_t)); cASSERT(mc, mp->mp_lower >= sizeof(indx_t));
mp->mp_lower -= sizeof(indx_t); mp->mp_lower -= sizeof(indx_t);
@ -17564,7 +17561,6 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) {
static void node_shrink(MDBX_page *mp, size_t indx) { static void node_shrink(MDBX_page *mp, size_t indx) {
MDBX_node *node; MDBX_node *node;
MDBX_page *sp, *xp; MDBX_page *sp, *xp;
char *base;
size_t nsize, delta, len, ptr; size_t nsize, delta, len, ptr;
intptr_t i; intptr_t i;
@ -17582,7 +17578,7 @@ static void node_shrink(MDBX_page *mp, size_t indx) {
assert(nsize % 1 == 0); assert(nsize % 1 == 0);
len = nsize; len = nsize;
} else { } else {
xp = (MDBX_page *)((char *)sp + delta); /* destination subpage */ xp = ptr_disp(sp, delta); /* destination subpage */
for (i = page_numkeys(sp); --i >= 0;) { for (i = page_numkeys(sp); --i >= 0;) {
assert(sp->mp_ptrs[i] >= delta); assert(sp->mp_ptrs[i] >= delta);
xp->mp_ptrs[i] = (indx_t)(sp->mp_ptrs[i] - delta); xp->mp_ptrs[i] = (indx_t)(sp->mp_ptrs[i] - delta);
@ -17595,8 +17591,8 @@ static void node_shrink(MDBX_page *mp, size_t indx) {
node_set_ds(node, nsize); node_set_ds(node, nsize);
/* Shift <lower nodes...initial part of subpage> upward */ /* Shift <lower nodes...initial part of subpage> upward */
base = (char *)mp + mp->mp_upper + PAGEHDRSZ; void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ);
memmove(base + delta, base, (char *)sp + len - base); memmove(ptr_disp(base, delta), base, ptr_dist(sp, base) + len);
ptr = mp->mp_ptrs[indx]; ptr = mp->mp_ptrs[indx];
for (i = page_numkeys(mp); --i >= 0;) { for (i = page_numkeys(mp); --i >= 0;) {
@ -18090,7 +18086,6 @@ int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) {
static int update_key(MDBX_cursor *mc, const MDBX_val *key) { static int update_key(MDBX_cursor *mc, const MDBX_val *key) {
MDBX_page *mp; MDBX_page *mp;
MDBX_node *node; MDBX_node *node;
char *base;
size_t len; size_t len;
ptrdiff_t delta, ksize, oksize; ptrdiff_t delta, ksize, oksize;
intptr_t ptr, i, nkeys, indx; intptr_t ptr, i, nkeys, indx;
@ -18135,9 +18130,9 @@ static int update_key(MDBX_cursor *mc, const MDBX_val *key) {
} }
} }
base = (char *)mp + mp->mp_upper + PAGEHDRSZ; void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ);
len = ptr - mp->mp_upper + NODESIZE; len = ptr - mp->mp_upper + NODESIZE;
memmove(base - delta, base, len); memmove(ptr_disp(base, -delta), base, len);
cASSERT(mc, mp->mp_upper >= delta); cASSERT(mc, mp->mp_upper >= delta);
mp->mp_upper -= (indx_t)delta; mp->mp_upper -= (indx_t)delta;
@ -18500,7 +18495,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) {
rc = node_add_leaf2(cdst, j++, &key); rc = node_add_leaf2(cdst, j++, &key);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
key.iov_base = (char *)key.iov_base + key.iov_len; key.iov_base = ptr_disp(key.iov_base, key.iov_len);
} while (++i != src_nkeys); } while (++i != src_nkeys);
} else { } else {
MDBX_node *srcnode = page_node(psrc, 0); MDBX_node *srcnode = page_node(psrc, 0);
@ -18991,7 +18986,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
rc = bad_page(mp, "invalid pgno (%u)\n", mp->mp_pgno); rc = bad_page(mp, "invalid pgno (%u)\n", mp->mp_pgno);
MDBX_env *const env = mc->mc_txn->mt_env; MDBX_env *const env = mc->mc_txn->mt_env;
const ptrdiff_t offset = (uint8_t *)mp - env->me_dxb_mmap.dxb; const ptrdiff_t offset = ptr_dist(mp, env->me_map);
unsigned flags_mask = P_ILL_BITS; unsigned flags_mask = P_ILL_BITS;
unsigned flags_expected = 0; unsigned flags_expected = 0;
if (offset < 0 || if (offset < 0 ||
@ -19063,7 +19058,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n",
mp->mp_lower, mp->mp_upper, page_space(env)); mp->mp_lower, mp->mp_upper, page_space(env));
char *const end_of_page = (char *)mp + env->me_psize; const char *const end_of_page = ptr_disp(mp, env->me_psize);
const size_t nkeys = page_numkeys(mp); const size_t nkeys = page_numkeys(mp);
STATIC_ASSERT(P_BRANCH == 1); STATIC_ASSERT(P_BRANCH == 1);
if (unlikely(nkeys <= (uint8_t)(mp->mp_flags & P_BRANCH))) { if (unlikely(nkeys <= (uint8_t)(mp->mp_flags & P_BRANCH))) {
@ -19094,7 +19089,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
MDBX_val here, prev = {0, 0}; MDBX_val here, prev = {0, 0};
for (size_t i = 0; i < nkeys; ++i) { for (size_t i = 0; i < nkeys; ++i) {
if (IS_LEAF2(mp)) { if (IS_LEAF2(mp)) {
char *const key = page_leaf2key(mp, i, leaf2_ksize); const char *const key = page_leaf2key(mp, i, leaf2_ksize);
if (unlikely(end_of_page < key + leaf2_ksize)) { if (unlikely(end_of_page < key + leaf2_ksize)) {
rc = bad_page(mp, "leaf2-key beyond (%zu) page-end\n", rc = bad_page(mp, "leaf2-key beyond (%zu) page-end\n",
key + leaf2_ksize - end_of_page); key + leaf2_ksize - end_of_page);
@ -19111,8 +19106,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = leaf2_ksize; mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = leaf2_ksize;
} }
if ((mc->mc_checking & CC_SKIPORD) == 0) { if ((mc->mc_checking & CC_SKIPORD) == 0) {
here.iov_base = (void *)key;
here.iov_len = leaf2_ksize; here.iov_len = leaf2_ksize;
here.iov_base = key;
if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0)) if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0))
rc = bad_page(mp, "leaf2-key #%zu wrong order (%s >= %s)\n", i, rc = bad_page(mp, "leaf2-key #%zu wrong order (%s >= %s)\n", i,
DKEY(&prev), DVAL(&here)); DKEY(&prev), DVAL(&here));
@ -19120,7 +19115,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
} }
} else { } else {
const MDBX_node *const node = page_node(mp, i); const MDBX_node *const node = page_node(mp, i);
const char *node_end = (char *)node + NODESIZE; const char *const node_end = ptr_disp(node, NODESIZE);
if (unlikely(node_end > end_of_page)) { if (unlikely(node_end > end_of_page)) {
rc = bad_page(mp, "node[%zu] (%zu) beyond page-end\n", i, rc = bad_page(mp, "node[%zu] (%zu) beyond page-end\n", i,
node_end - end_of_page); node_end - end_of_page);
@ -19129,7 +19124,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
const size_t ksize = node_ks(node); const size_t ksize = node_ks(node);
if (unlikely(ksize > ksize_max)) if (unlikely(ksize > ksize_max))
rc = bad_page(mp, "node[%zu] too long key (%zu)\n", i, ksize); rc = bad_page(mp, "node[%zu] too long key (%zu)\n", i, ksize);
char *key = node_key(node); const char *const key = node_key(node);
if (unlikely(end_of_page < key + ksize)) { if (unlikely(end_of_page < key + ksize)) {
rc = bad_page(mp, "node[%zu] key (%zu) beyond page-end\n", i, rc = bad_page(mp, "node[%zu] key (%zu) beyond page-end\n", i,
key + ksize - end_of_page); key + ksize - end_of_page);
@ -19142,7 +19137,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
mp, "node[%zu] key size (%zu) <> min/max key-length (%zu/%zu)\n", mp, "node[%zu] key size (%zu) <> min/max key-length (%zu/%zu)\n",
i, ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max); i, ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max);
if ((mc->mc_checking & CC_SKIPORD) == 0) { if ((mc->mc_checking & CC_SKIPORD) == 0) {
here.iov_base = key; here.iov_base = (void *)key;
here.iov_len = ksize; here.iov_len = ksize;
if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0)) if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0))
rc = bad_page(mp, "node[%zu] key wrong order (%s >= %s)\n", i, rc = bad_page(mp, "node[%zu] key wrong order (%s >= %s)\n", i,
@ -19282,8 +19277,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
for (int j = 0; j < nsubkeys; j++) { for (int j = 0; j < nsubkeys; j++) {
if (IS_LEAF2(sp)) { if (IS_LEAF2(sp)) {
/* LEAF2 pages have no mp_ptrs[] or node headers */ /* LEAF2 pages have no mp_ptrs[] or node headers */
size_t sub_ksize = sp->mp_leaf2_ksize; const size_t sub_ksize = sp->mp_leaf2_ksize;
char *sub_key = page_leaf2key(sp, j, sub_ksize); const char *const sub_key = page_leaf2key(sp, j, sub_ksize);
if (unlikely(end_of_subpage < sub_key + sub_ksize)) { if (unlikely(end_of_subpage < sub_key + sub_ksize)) {
rc = bad_page(mp, "nested-leaf2-key beyond (%zu) nested-page\n", rc = bad_page(mp, "nested-leaf2-key beyond (%zu) nested-page\n",
sub_key + sub_ksize - end_of_subpage); sub_key + sub_ksize - end_of_subpage);
@ -19302,8 +19297,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = sub_ksize; mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = sub_ksize;
} }
if ((mc->mc_checking & CC_SKIPORD) == 0) { if ((mc->mc_checking & CC_SKIPORD) == 0) {
sub_here.iov_base = (void *)sub_key;
sub_here.iov_len = sub_ksize; sub_here.iov_len = sub_ksize;
sub_here.iov_base = sub_key;
if (sub_prev.iov_base && if (sub_prev.iov_base &&
unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= 0)) unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= 0))
rc = bad_page(mp, rc = bad_page(mp,
@ -19313,7 +19308,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
} }
} else { } else {
const MDBX_node *const sub_node = page_node(sp, j); const MDBX_node *const sub_node = page_node(sp, j);
const char *sub_node_end = (char *)sub_node + NODESIZE; const char *const sub_node_end = ptr_disp(sub_node, NODESIZE);
if (unlikely(sub_node_end > end_of_subpage)) { if (unlikely(sub_node_end > end_of_subpage)) {
rc = bad_page(mp, "nested-node beyond (%zu) nested-page\n", rc = bad_page(mp, "nested-node beyond (%zu) nested-page\n",
end_of_subpage - sub_node_end); end_of_subpage - sub_node_end);
@ -19323,9 +19318,9 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
rc = bad_page(mp, "nested-node invalid flags (%u)\n", rc = bad_page(mp, "nested-node invalid flags (%u)\n",
node_flags(sub_node)); node_flags(sub_node));
size_t sub_ksize = node_ks(sub_node); const size_t sub_ksize = node_ks(sub_node);
char *sub_key = node_key(sub_node); const char *const sub_key = node_key(sub_node);
size_t sub_dsize = node_ds(sub_node); const size_t sub_dsize = node_ds(sub_node);
/* char *sub_data = node_data(sub_node); */ /* char *sub_data = node_data(sub_node); */
if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min || if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min ||
@ -19336,8 +19331,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
sub_ksize, mc->mc_dbx->md_vlen_min, sub_ksize, mc->mc_dbx->md_vlen_min,
mc->mc_dbx->md_vlen_max); mc->mc_dbx->md_vlen_max);
if ((mc->mc_checking & CC_SKIPORD) == 0) { if ((mc->mc_checking & CC_SKIPORD) == 0) {
sub_here.iov_base = (void *)sub_key;
sub_here.iov_len = sub_ksize; sub_here.iov_len = sub_ksize;
sub_here.iov_base = sub_key;
if (sub_prev.iov_base && if (sub_prev.iov_base &&
unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= 0)) unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= 0))
rc = bad_page(mp, rc = bad_page(mp,
@ -19775,14 +19770,12 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
TRACE("old-first-key is %s", DKEY_DEBUG(&sepkey)); TRACE("old-first-key is %s", DKEY_DEBUG(&sepkey));
} else { } else {
if (IS_LEAF2(sister)) { if (IS_LEAF2(sister)) {
char *split, *ins;
size_t lsize, rsize, ksize;
/* Move half of the keys to the right sibling */ /* Move half of the keys to the right sibling */
const intptr_t distance = mc->mc_ki[mc->mc_top] - split_indx; const intptr_t distance = mc->mc_ki[mc->mc_top] - split_indx;
ksize = mc->mc_db->md_xsize; size_t ksize = mc->mc_db->md_xsize;
split = page_leaf2key(mp, split_indx, ksize); void *const split = page_leaf2key(mp, split_indx, ksize);
rsize = (nkeys - split_indx) * ksize; size_t rsize = (nkeys - split_indx) * ksize;
lsize = (nkeys - split_indx) * sizeof(indx_t); size_t lsize = (nkeys - split_indx) * sizeof(indx_t);
cASSERT(mc, mp->mp_lower >= lsize); cASSERT(mc, mp->mp_lower >= lsize);
mp->mp_lower -= (indx_t)lsize; mp->mp_lower -= (indx_t)lsize;
cASSERT(mc, sister->mp_lower + lsize <= UINT16_MAX); cASSERT(mc, sister->mp_lower + lsize <= UINT16_MAX);
@ -19795,10 +19788,11 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
sepkey.iov_base = (newindx != split_indx) ? split : newkey->iov_base; sepkey.iov_base = (newindx != split_indx) ? split : newkey->iov_base;
if (distance < 0) { if (distance < 0) {
cASSERT(mc, ksize >= sizeof(indx_t)); cASSERT(mc, ksize >= sizeof(indx_t));
ins = page_leaf2key(mp, mc->mc_ki[mc->mc_top], ksize); void *const ins = page_leaf2key(mp, mc->mc_ki[mc->mc_top], ksize);
memcpy(sister->mp_ptrs, split, rsize); memcpy(sister->mp_ptrs, split, rsize);
sepkey.iov_base = sister->mp_ptrs; sepkey.iov_base = sister->mp_ptrs;
memmove(ins + ksize, ins, (split_indx - mc->mc_ki[mc->mc_top]) * ksize); memmove(ptr_disp(ins, ksize), ins,
(split_indx - mc->mc_ki[mc->mc_top]) * ksize);
memcpy(ins, newkey->iov_base, ksize); memcpy(ins, newkey->iov_base, ksize);
cASSERT(mc, UINT16_MAX - mp->mp_lower >= (int)sizeof(indx_t)); cASSERT(mc, UINT16_MAX - mp->mp_lower >= (int)sizeof(indx_t));
mp->mp_lower += sizeof(indx_t); mp->mp_lower += sizeof(indx_t);
@ -19806,9 +19800,10 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
mp->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); mp->mp_upper -= (indx_t)(ksize - sizeof(indx_t));
} else { } else {
memcpy(sister->mp_ptrs, split, distance * ksize); memcpy(sister->mp_ptrs, split, distance * ksize);
ins = page_leaf2key(sister, distance, ksize); void *const ins = page_leaf2key(sister, distance, ksize);
memcpy(ins, newkey->iov_base, ksize); memcpy(ins, newkey->iov_base, ksize);
memcpy(ins + ksize, split + distance * ksize, rsize - distance * ksize); memcpy(ptr_disp(ins, ksize), ptr_disp(split, distance * ksize),
rsize - distance * ksize);
cASSERT(mc, UINT16_MAX - sister->mp_lower >= (int)sizeof(indx_t)); cASSERT(mc, UINT16_MAX - sister->mp_lower >= (int)sizeof(indx_t));
sister->mp_lower += sizeof(indx_t); sister->mp_lower += sizeof(indx_t);
cASSERT(mc, sister->mp_upper >= ksize - sizeof(indx_t)); cASSERT(mc, sister->mp_upper >= ksize - sizeof(indx_t));
@ -19887,8 +19882,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
cASSERT(mc, i <= nkeys); cASSERT(mc, i <= nkeys);
size_t size = new_size; size_t size = new_size;
if (i != newindx) { if (i != newindx) {
MDBX_node *node = MDBX_node *node = ptr_disp(mp, tmp_ki_copy->mp_ptrs[i] + PAGEHDRSZ);
(MDBX_node *)((char *)mp + tmp_ki_copy->mp_ptrs[i] + PAGEHDRSZ);
size = NODESIZE + node_ks(node) + sizeof(indx_t); size = NODESIZE + node_ks(node) + sizeof(indx_t);
if (IS_LEAF(mp)) if (IS_LEAF(mp))
size += (node_flags(node) & F_BIGDATA) ? sizeof(pgno_t) size += (node_flags(node) & F_BIGDATA) ? sizeof(pgno_t)
@ -19924,8 +19918,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
sepkey = *newkey; sepkey = *newkey;
if (split_indx != newindx) { if (split_indx != newindx) {
MDBX_node *node = MDBX_node *node =
(MDBX_node *)((char *)mp + tmp_ki_copy->mp_ptrs[split_indx] + ptr_disp(mp, tmp_ki_copy->mp_ptrs[split_indx] + PAGEHDRSZ);
PAGEHDRSZ);
sepkey.iov_len = node_ks(node); sepkey.iov_len = node_ks(node);
sepkey.iov_base = node_key(node); sepkey.iov_base = node_key(node);
} }
@ -20082,8 +20075,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
/* Update index for the new key. */ /* Update index for the new key. */
mc->mc_ki[mc->mc_top] = (indx_t)n; mc->mc_ki[mc->mc_top] = (indx_t)n;
} else { } else {
MDBX_node *node = MDBX_node *node = ptr_disp(mp, tmp_ki_copy->mp_ptrs[i] + PAGEHDRSZ);
(MDBX_node *)((char *)mp + tmp_ki_copy->mp_ptrs[i] + PAGEHDRSZ);
rkey.iov_base = node_key(node); rkey.iov_base = node_key(node);
rkey.iov_len = node_ks(node); rkey.iov_len = node_ks(node);
if (IS_LEAF(mp)) { if (IS_LEAF(mp)) {
@ -20378,7 +20370,7 @@ static int compacting_put_bytes(mdbx_compacting_ctx *ctx, const void *src,
assert(pgno == 0 || bytes > PAGEHDRSZ); assert(pgno == 0 || bytes > PAGEHDRSZ);
while (bytes > 0) { while (bytes > 0) {
const size_t side = ctx->mc_head & 1; const size_t side = ctx->mc_head & 1;
const size_t left = (size_t)MDBX_ENVCOPY_WRITEBUF - ctx->mc_wlen[side]; const size_t left = MDBX_ENVCOPY_WRITEBUF - ctx->mc_wlen[side];
if (left < (pgno ? PAGEHDRSZ : 1)) { if (left < (pgno ? PAGEHDRSZ : 1)) {
int err = compacting_toggle_write_buffers(ctx); int err = compacting_toggle_write_buffers(ctx);
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
@ -20401,7 +20393,7 @@ static int compacting_put_bytes(mdbx_compacting_ctx *ctx, const void *src,
} }
pgno = 0; pgno = 0;
} }
src = (const char *)src + chunk; src = ptr_disp(src, chunk);
} else } else
memset(dst, 0, chunk); memset(dst, 0, chunk);
bytes -= chunk; bytes -= chunk;
@ -20434,8 +20426,7 @@ static int compacting_put_page(mdbx_compacting_ctx *ctx, const MDBX_page *mp,
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
return compacting_put_bytes( return compacting_put_bytes(
ctx, (const char *)mp + ctx->mc_env->me_psize - tail_bytes, tail_bytes, 0, ctx, ptr_disp(mp, ctx->mc_env->me_psize - tail_bytes), tail_bytes, 0, 0);
0);
} }
__cold static int compacting_walk_tree(mdbx_compacting_ctx *ctx, __cold static int compacting_walk_tree(mdbx_compacting_ctx *ctx,
@ -20451,18 +20442,18 @@ __cold static int compacting_walk_tree(mdbx_compacting_ctx *ctx,
return rc; return rc;
/* Make cursor pages writable */ /* Make cursor pages writable */
char *const buf = osal_malloc(pgno2bytes(ctx->mc_env, mc->mc_snum)); void *const buf = osal_malloc(pgno2bytes(ctx->mc_env, mc->mc_snum));
if (buf == NULL) if (buf == NULL)
return MDBX_ENOMEM; return MDBX_ENOMEM;
char *ptr = buf; void *ptr = buf;
for (size_t i = 0; i < mc->mc_top; i++) { for (size_t i = 0; i < mc->mc_top; i++) {
page_copy((MDBX_page *)ptr, mc->mc_pg[i], ctx->mc_env->me_psize); page_copy(ptr, mc->mc_pg[i], ctx->mc_env->me_psize);
mc->mc_pg[i] = (MDBX_page *)ptr; mc->mc_pg[i] = ptr;
ptr += ctx->mc_env->me_psize; ptr = ptr_disp(ptr, ctx->mc_env->me_psize);
} }
/* This is writable space for a leaf page. Usually not needed. */ /* This is writable space for a leaf page. Usually not needed. */
MDBX_page *const leaf = (MDBX_page *)ptr; MDBX_page *const leaf = ptr;
while (mc->mc_snum > 0) { while (mc->mc_snum > 0) {
MDBX_page *mp = mc->mc_pg[mc->mc_top]; MDBX_page *mp = mc->mc_pg[mc->mc_top];
@ -20823,8 +20814,7 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
* but writing ones after the data was flushed */ * but writing ones after the data was flushed */
memcpy(buffer, env->me_map, meta_bytes); memcpy(buffer, env->me_map, meta_bytes);
MDBX_meta *const headcopy = /* LY: get pointer to the snapshot copy */ MDBX_meta *const headcopy = /* LY: get pointer to the snapshot copy */
(MDBX_meta *)(buffer + ptr_disp(buffer, ptr_dist(meta_recent(env, &troika).ptr_c, env->me_map));
((uint8_t *)meta_recent(env, &troika).ptr_c - env->me_map));
mdbx_txn_unlock(env); mdbx_txn_unlock(env);
if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE)
@ -20899,7 +20889,7 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
? (size_t)MDBX_ENVCOPY_WRITEBUF ? (size_t)MDBX_ENVCOPY_WRITEBUF
: used_size - offset; : used_size - offset;
/* copy to avoid EFAULT in case swapped-out */ /* copy to avoid EFAULT in case swapped-out */
memcpy(data_buffer, env->me_map + offset, chunk); memcpy(data_buffer, ptr_disp(env->me_map, offset), chunk);
rc = osal_write(fd, data_buffer, chunk); rc = osal_write(fd, data_buffer, chunk);
offset += chunk; offset += chunk;
} }
@ -21746,7 +21736,7 @@ static int dbi_open(MDBX_txn *txn, const char *table_name, unsigned user_flags,
} }
/* Done here so we cannot fail after creating a new DB */ /* Done here so we cannot fail after creating a new DB */
char *namedup = osal_strdup(table_name); char *const namedup = osal_strdup(table_name);
if (unlikely(!namedup)) { if (unlikely(!namedup)) {
rc = MDBX_ENOMEM; rc = MDBX_ENOMEM;
goto early_bailout; goto early_bailout;
@ -21888,7 +21878,7 @@ static int dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) {
if (unlikely(dbi >= env->me_numdbs)) if (unlikely(dbi >= env->me_numdbs))
return MDBX_BAD_DBI; return MDBX_BAD_DBI;
char *ptr = env->me_dbxs[dbi].md_name.iov_base; char *const ptr = env->me_dbxs[dbi].md_name.iov_base;
/* If there was no name, this was already closed */ /* If there was no name, this was already closed */
if (unlikely(!ptr)) if (unlikely(!ptr))
return MDBX_BAD_DBI; return MDBX_BAD_DBI;
@ -23481,7 +23471,7 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
return rc; return rc;
const MDBX_env *env = txn->mt_env; const MDBX_env *env = txn->mt_env;
const ptrdiff_t offset = (uint8_t *)ptr - env->me_map; const ptrdiff_t offset = ptr_dist(ptr, env->me_map);
if (offset >= 0) { if (offset >= 0) {
const pgno_t pgno = bytes2pgno(env, offset); const pgno_t pgno = bytes2pgno(env, offset);
if (likely(pgno < txn->mt_next_pgno)) { if (likely(pgno < txn->mt_next_pgno)) {

View File

@ -572,7 +572,14 @@ typedef struct MDBX_page {
/* Size of the page header, excluding dynamic data at the end */ /* Size of the page header, excluding dynamic data at the end */
#define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs) #define PAGEHDRSZ offsetof(MDBX_page, mp_ptrs)
#define mp_next(mp) (*(MDBX_page **)((mp)->mp_ptrs + 2)) /* Pointer displacement without casting to char* to avoid pointer-aliasing */
#define ptr_disp(ptr, disp) ((void *)(((intptr_t)(ptr)) + ((intptr_t)(disp))))
/* Pointer distance as signed number of bytes */
#define ptr_dist(more, less) (((intptr_t)(more)) - ((intptr_t)(less)))
#define mp_next(mp) \
(*(MDBX_page **)ptr_disp((mp)->mp_ptrs, sizeof(void *) - sizeof(uint32_t)))
#pragma pack(pop) #pragma pack(pop)
@ -1204,7 +1211,7 @@ struct MDBX_env {
#define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY) #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY)
uint32_t me_flags; uint32_t me_flags;
osal_mmap_t me_dxb_mmap; /* The main data file */ osal_mmap_t me_dxb_mmap; /* The main data file */
#define me_map me_dxb_mmap.dxb #define me_map me_dxb_mmap.base
#define me_lazy_fd me_dxb_mmap.fd #define me_lazy_fd me_dxb_mmap.fd
#define me_fd4data me_ioring.fd #define me_fd4data me_ioring.fd
mdbx_filehandle_t me_dsync_fd, me_fd4meta; mdbx_filehandle_t me_dsync_fd, me_fd4meta;
@ -1470,7 +1477,8 @@ MDBX_NORETURN __cold void assert_fail(const char *msg, const char *func,
#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
MDBX_MAYBE_UNUSED static __inline void MDBX_MAYBE_UNUSED static __inline void
osal_flush_incoherent_mmap(void *addr, size_t nbytes, const intptr_t pagesize) { osal_flush_incoherent_mmap(const void *addr, size_t nbytes,
const intptr_t pagesize) {
#if MDBX_MMAP_INCOHERENT_FILE_WRITE #if MDBX_MMAP_INCOHERENT_FILE_WRITE
char *const begin = (char *)(-pagesize & (intptr_t)addr); char *const begin = (char *)(-pagesize & (intptr_t)addr);
char *const end = char *const end =
@ -1486,7 +1494,7 @@ osal_flush_incoherent_mmap(void *addr, size_t nbytes, const intptr_t pagesize) {
#ifdef DCACHE #ifdef DCACHE
/* MIPS has cache coherency issues. /* MIPS has cache coherency issues.
* Note: for any nbytes >= on-chip cache size, entire is flushed. */ * Note: for any nbytes >= on-chip cache size, entire is flushed. */
cacheflush(addr, nbytes, DCACHE); cacheflush((void *)addr, nbytes, DCACHE);
#else #else
#error "Oops, cacheflush() not available" #error "Oops, cacheflush() not available"
#endif /* DCACHE */ #endif /* DCACHE */

View File

@ -628,7 +628,7 @@ MDBX_INTERNAL_FUNC int osal_ioring_create(osal_ioring_t *ior,
assert(osal_iov_max > 0); assert(osal_iov_max > 0);
#endif /* MDBX_HAVE_PWRITEV && _SC_IOV_MAX */ #endif /* MDBX_HAVE_PWRITEV && _SC_IOV_MAX */
ior->boundary = (char *)(ior->pool + ior->allocated); ior->boundary = ptr_disp(ior->pool, ior->allocated);
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -645,9 +645,9 @@ static __inline size_t ior_offset(const ior_item_t *item) {
static __inline ior_item_t *ior_next(ior_item_t *item, size_t sgvcnt) { static __inline ior_item_t *ior_next(ior_item_t *item, size_t sgvcnt) {
#if defined(ior_sgv_element) #if defined(ior_sgv_element)
assert(sgvcnt > 0); assert(sgvcnt > 0);
return (ior_item_t *)((char *)item + sizeof(ior_item_t) - return (ior_item_t *)ptr_disp(item, sizeof(ior_item_t) -
sizeof(ior_sgv_element) + sizeof(ior_sgv_element) +
sizeof(ior_sgv_element) * sgvcnt); sizeof(ior_sgv_element) * sgvcnt);
#else #else
assert(sgvcnt == 1); assert(sgvcnt == 1);
(void)sgvcnt; (void)sgvcnt;
@ -684,15 +684,15 @@ MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ior, const size_t offset,
size_t i = 0; size_t i = 0;
do { do {
item->sgv[ior->last_sgvcnt + i].Buffer = PtrToPtr64(data); item->sgv[ior->last_sgvcnt + i].Buffer = PtrToPtr64(data);
data = (char *)data + ior->pagesize; data = ptr_disp(data, ior->pagesize);
} while (++i < segments); } while (++i < segments);
ior->slots_left -= segments; ior->slots_left -= segments;
item->sgv[ior->last_sgvcnt += segments].Buffer = 0; item->sgv[ior->last_sgvcnt += segments].Buffer = 0;
assert((item->single.iov_len & ior_WriteFile_flag) == 0); assert((item->single.iov_len & ior_WriteFile_flag) == 0);
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
const void *end = (char *)(item->single.iov_base) + item->single.iov_len - const void *end = ptr_disp(item->single.iov_base,
ior_WriteFile_flag; item->single.iov_len - ior_WriteFile_flag);
if (unlikely(end == data)) { if (unlikely(end == data)) {
assert((item->single.iov_len & ior_WriteFile_flag) != 0); assert((item->single.iov_len & ior_WriteFile_flag) != 0);
item->single.iov_len += bytes; item->single.iov_len += bytes;
@ -700,8 +700,8 @@ MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ior, const size_t offset,
} }
#elif MDBX_HAVE_PWRITEV #elif MDBX_HAVE_PWRITEV
assert((int)item->sgvcnt > 0); assert((int)item->sgvcnt > 0);
const void *end = (char *)(item->sgv[item->sgvcnt - 1].iov_base) + const void *end = ptr_disp(item->sgv[item->sgvcnt - 1].iov_base,
item->sgv[item->sgvcnt - 1].iov_len; item->sgv[item->sgvcnt - 1].iov_len);
if (unlikely(end == data)) { if (unlikely(end == data)) {
item->sgv[item->sgvcnt - 1].iov_len += bytes; item->sgv[item->sgvcnt - 1].iov_len += bytes;
ior->last_bytes += bytes; ior->last_bytes += bytes;
@ -718,7 +718,7 @@ MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ior, const size_t offset,
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
#else #else
const void *end = (char *)(item->single.iov_base) + item->single.iov_len; const void *end = ptr_disp(item->single.iov_base, item->single.iov_len);
if (unlikely(end == data)) { if (unlikely(end == data)) {
item->single.iov_len += bytes; item->single.iov_len += bytes;
return MDBX_SUCCESS; return MDBX_SUCCESS;
@ -747,7 +747,7 @@ MDBX_INTERNAL_FUNC int osal_ioring_add(osal_ioring_t *ior, const size_t offset,
/* WriteFileGather() */ /* WriteFileGather() */
item->sgv[0].Buffer = PtrToPtr64(data); item->sgv[0].Buffer = PtrToPtr64(data);
for (size_t i = 1; i < segments; ++i) { for (size_t i = 1; i < segments; ++i) {
data = (char *)data + ior->pagesize; data = ptr_disp(data, ior->pagesize);
item->sgv[slots_used].Buffer = PtrToPtr64(data); item->sgv[slots_used].Buffer = PtrToPtr64(data);
} }
item->sgv[slots_used].Buffer = 0; item->sgv[slots_used].Buffer = 0;
@ -1131,7 +1131,7 @@ MDBX_INTERNAL_FUNC int osal_ioring_resize(osal_ioring_t *ior, size_t items) {
memset(ior->pool + ior->allocated, 0, memset(ior->pool + ior->allocated, 0,
sizeof(ior_item_t) * (items - ior->allocated)); sizeof(ior_item_t) * (items - ior->allocated));
ior->allocated = (unsigned)items; ior->allocated = (unsigned)items;
ior->boundary = (char *)(ior->pool + ior->allocated); ior->boundary = ptr_disp(ior->pool, ior->allocated);
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
if (useSetFileIoOverlappedRange) { if (useSetFileIoOverlappedRange) {
if (mdbx_SetFileIoOverlappedRange(ior->fd, ptr, (ULONG)bytes)) if (mdbx_SetFileIoOverlappedRange(ior->fd, ptr, (ULONG)bytes))
@ -1474,7 +1474,7 @@ MDBX_INTERNAL_FUNC int osal_pwrite(mdbx_filehandle_t fd, const void *buf,
#endif #endif
bytes -= written; bytes -= written;
offset += written; offset += written;
buf = (char *)buf + written; buf = ptr_disp(buf, written);
} }
} }
@ -1504,7 +1504,7 @@ MDBX_INTERNAL_FUNC int osal_write(mdbx_filehandle_t fd, const void *buf,
} }
#endif #endif
bytes -= written; bytes -= written;
buf = (char *)buf + written; buf = ptr_disp(buf, written);
} }
} }
@ -1701,7 +1701,7 @@ MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread) {
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset, MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
size_t length, size_t length,
enum osal_syncmode_bits mode_bits) { enum osal_syncmode_bits mode_bits) {
uint8_t *ptr = (uint8_t *)map->address + offset; void *ptr = ptr_disp(map->base, offset);
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
if (!FlushViewOfFile(ptr, length)) if (!FlushViewOfFile(ptr, length))
return (int)GetLastError(); return (int)GetLastError();
@ -2063,7 +2063,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
assert(size <= limit); assert(size <= limit);
map->limit = 0; map->limit = 0;
map->current = 0; map->current = 0;
map->address = nullptr; map->base = nullptr;
map->filesize = 0; map->filesize = 0;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
map->section = NULL; map->section = NULL;
@ -2115,7 +2115,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
: mdbx_RunningUnderWine() ? size : mdbx_RunningUnderWine() ? size
: limit; : limit;
err = NtMapViewOfSection( err = NtMapViewOfSection(
map->section, GetCurrentProcess(), &map->address, map->section, GetCurrentProcess(), &map->base,
/* ZeroBits */ 0, /* ZeroBits */ 0,
/* CommitSize */ 0, /* CommitSize */ 0,
/* SectionOffset */ NULL, &ViewSize, /* SectionOffset */ NULL, &ViewSize,
@ -2126,10 +2126,10 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
if (!NT_SUCCESS(err)) { if (!NT_SUCCESS(err)) {
NtClose(map->section); NtClose(map->section);
map->section = 0; map->section = 0;
map->address = nullptr; map->base = nullptr;
return ntstatus2errcode(err); return ntstatus2errcode(err);
} }
assert(map->address != MAP_FAILED); assert(map->base != MAP_FAILED);
map->current = (size_t)SectionSize.QuadPart; map->current = (size_t)SectionSize.QuadPart;
map->limit = ViewSize; map->limit = ViewSize;
@ -2160,7 +2160,7 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
#define MAP_NORESERVE 0 #define MAP_NORESERVE 0
#endif #endif
map->address = mmap( map->base = mmap(
NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ, NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ,
MAP_SHARED | MAP_FILE | MAP_NORESERVE | MAP_SHARED | MAP_FILE | MAP_NORESERVE |
(F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0) | (F_ISSET(flags, MDBX_UTTERLY_NOSYNC) ? MAP_NOSYNC : 0) |
@ -2168,10 +2168,10 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
: MAP_CONCEAL), : MAP_CONCEAL),
map->fd, 0); map->fd, 0);
if (unlikely(map->address == MAP_FAILED)) { if (unlikely(map->base == MAP_FAILED)) {
map->limit = 0; map->limit = 0;
map->current = 0; map->current = 0;
map->address = nullptr; map->base = nullptr;
assert(errno != 0); assert(errno != 0);
return errno; return errno;
} }
@ -2179,39 +2179,38 @@ MDBX_INTERNAL_FUNC int osal_mmap(const int flags, osal_mmap_t *map,
#if MDBX_ENABLE_MADVISE #if MDBX_ENABLE_MADVISE
#ifdef MADV_DONTFORK #ifdef MADV_DONTFORK
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0)) if (unlikely(madvise(map->base, map->limit, MADV_DONTFORK) != 0))
return errno; return errno;
#endif /* MADV_DONTFORK */ #endif /* MADV_DONTFORK */
#ifdef MADV_NOHUGEPAGE #ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE); (void)madvise(map->base, map->limit, MADV_NOHUGEPAGE);
#endif /* MADV_NOHUGEPAGE */ #endif /* MADV_NOHUGEPAGE */
#endif /* MDBX_ENABLE_MADVISE */ #endif /* MDBX_ENABLE_MADVISE */
#endif /* ! Windows */ #endif /* ! Windows */
VALGRIND_MAKE_MEM_DEFINED(map->address, map->current); VALGRIND_MAKE_MEM_DEFINED(map->base, map->current);
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address, map->current); MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, map->current);
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) { MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current); VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current);
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
* when this memory will re-used by malloc or another mmapping. * when this memory will re-used by malloc or another mmapping.
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
*/ */
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address, MDBX_ASAN_UNPOISON_MEMORY_REGION(
(map->filesize && map->filesize < map->limit) map->base, (map->filesize && map->filesize < map->limit) ? map->filesize
? map->filesize : map->limit);
: map->limit);
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
if (map->section) if (map->section)
NtClose(map->section); NtClose(map->section);
NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->address); NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->base);
if (!NT_SUCCESS(rc)) if (!NT_SUCCESS(rc))
ntstatus2errcode(rc); ntstatus2errcode(rc);
#else #else
if (unlikely(munmap(map->address, map->limit))) { if (unlikely(munmap(map->base, map->limit))) {
assert(errno != 0); assert(errno != 0);
return errno; return errno;
} }
@ -2219,7 +2218,7 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
map->limit = 0; map->limit = 0;
map->current = 0; map->current = 0;
map->address = nullptr; map->base = nullptr;
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -2252,7 +2251,7 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
return err; return err;
/* check ability of address space for growth before unmap */ /* check ability of address space for growth before unmap */
PVOID BaseAddress = (PBYTE)map->address + map->limit; PVOID BaseAddress = (PBYTE)map->base + map->limit;
SIZE_T RegionSize = limit - map->limit; SIZE_T RegionSize = limit - map->limit;
status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0, status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0,
&RegionSize, MEM_RESERVE, PAGE_NOACCESS); &RegionSize, MEM_RESERVE, PAGE_NOACCESS);
@ -2279,8 +2278,8 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
* when this memory will re-used by malloc or another mmapping. * when this memory will re-used by malloc or another mmapping.
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
*/ */
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit); MDBX_ASAN_UNPOISON_MEMORY_REGION(map->base, map->limit);
status = NtUnmapViewOfSection(GetCurrentProcess(), map->address); status = NtUnmapViewOfSection(GetCurrentProcess(), map->base);
if (!NT_SUCCESS(status)) if (!NT_SUCCESS(status))
return ntstatus2errcode(status); return ntstatus2errcode(status);
status = NtClose(map->section); status = NtClose(map->section);
@ -2292,7 +2291,7 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
bailout_ntstatus: bailout_ntstatus:
err = ntstatus2errcode(status); err = ntstatus2errcode(status);
bailout: bailout:
map->address = NULL; map->base = NULL;
map->current = map->limit = 0; map->current = map->limit = 0;
if (ReservedAddress) { if (ReservedAddress) {
ReservedSize = 0; ReservedSize = 0;
@ -2307,7 +2306,7 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
retry_file_and_section: retry_file_and_section:
/* resizing of the file may take a while, /* resizing of the file may take a while,
* therefore we reserve address space to avoid occupy it by other threads */ * therefore we reserve address space to avoid occupy it by other threads */
ReservedAddress = map->address; ReservedAddress = map->base;
status = NtAllocateVirtualMemory(GetCurrentProcess(), &ReservedAddress, 0, status = NtAllocateVirtualMemory(GetCurrentProcess(), &ReservedAddress, 0,
&ReservedSize, MEM_RESERVE, PAGE_NOACCESS); &ReservedSize, MEM_RESERVE, PAGE_NOACCESS);
if (!NT_SUCCESS(status)) { if (!NT_SUCCESS(status)) {
@ -2317,7 +2316,7 @@ retry_file_and_section:
if (flags & MDBX_MRESIZE_MAY_MOVE) if (flags & MDBX_MRESIZE_MAY_MOVE)
/* the base address could be changed */ /* the base address could be changed */
map->address = NULL; map->base = NULL;
} }
err = osal_filesize(map->fd, &map->filesize); err = osal_filesize(map->fd, &map->filesize);
@ -2362,7 +2361,7 @@ retry_file_and_section:
retry_mapview:; retry_mapview:;
SIZE_T ViewSize = (flags & MDBX_RDONLY) ? size : limit; SIZE_T ViewSize = (flags & MDBX_RDONLY) ? size : limit;
status = NtMapViewOfSection( status = NtMapViewOfSection(
map->section, GetCurrentProcess(), &map->address, map->section, GetCurrentProcess(), &map->base,
/* ZeroBits */ 0, /* ZeroBits */ 0,
/* CommitSize */ 0, /* CommitSize */ 0,
/* SectionOffset */ NULL, &ViewSize, /* SectionOffset */ NULL, &ViewSize,
@ -2373,15 +2372,15 @@ retry_mapview:;
if (!NT_SUCCESS(status)) { if (!NT_SUCCESS(status)) {
if (status == (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 && if (status == (NTSTATUS) /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 &&
map->address && (flags & MDBX_MRESIZE_MAY_MOVE) != 0) { map->base && (flags & MDBX_MRESIZE_MAY_MOVE) != 0) {
/* try remap at another base address */ /* try remap at another base address */
map->address = NULL; map->base = NULL;
goto retry_mapview; goto retry_mapview;
} }
NtClose(map->section); NtClose(map->section);
map->section = NULL; map->section = NULL;
if (map->address && (size != map->current || limit != map->limit)) { if (map->base && (size != map->current || limit != map->limit)) {
/* try remap with previously size and limit, /* try remap with previously size and limit,
* but will return MDBX_UNABLE_EXTEND_MAPSIZE on success */ * but will return MDBX_UNABLE_EXTEND_MAPSIZE on success */
rc = (limit > map->limit) ? MDBX_UNABLE_EXTEND_MAPSIZE : MDBX_EPERM; rc = (limit > map->limit) ? MDBX_UNABLE_EXTEND_MAPSIZE : MDBX_EPERM;
@ -2393,7 +2392,7 @@ retry_mapview:;
/* no way to recovery */ /* no way to recovery */
goto bailout_ntstatus; goto bailout_ntstatus;
} }
assert(map->address != MAP_FAILED); assert(map->base != MAP_FAILED);
map->current = (size_t)SectionSize.QuadPart; map->current = (size_t)SectionSize.QuadPart;
map->limit = ViewSize; map->limit = ViewSize;
@ -2425,7 +2424,7 @@ retry_mapview:;
* - this allows us to clear the mask only within the file size * - this allows us to clear the mask only within the file size
* when closing the mapping. */ * when closing the mapping. */
MDBX_ASAN_UNPOISON_MEMORY_REGION( MDBX_ASAN_UNPOISON_MEMORY_REGION(
(char *)map->address + size, ptr_disp(map->base, size),
((map->current < map->limit) ? map->current : map->limit) - size); ((map->current < map->limit) ? map->current : map->limit) - size);
} }
map->current = size; map->current = size;
@ -2437,7 +2436,7 @@ retry_mapview:;
if (limit < map->limit) { if (limit < map->limit) {
/* unmap an excess at end of mapping. */ /* unmap an excess at end of mapping. */
// coverity[offset_free : FALSE] // coverity[offset_free : FALSE]
if (unlikely(munmap(map->dxb + limit, map->limit - limit))) { if (unlikely(munmap(ptr_disp(map->base, limit), map->limit - limit))) {
assert(errno != 0); assert(errno != 0);
return errno; return errno;
} }
@ -2450,10 +2449,10 @@ retry_mapview:;
return err; return err;
assert(limit > map->limit); assert(limit > map->limit);
uint8_t *ptr = MAP_FAILED; void *ptr = MAP_FAILED;
#if (defined(__linux__) || defined(__gnu_linux__)) && defined(_GNU_SOURCE) #if (defined(__linux__) || defined(__gnu_linux__)) && defined(_GNU_SOURCE)
ptr = mremap(map->address, map->limit, limit, ptr = mremap(map->base, map->limit, limit,
#if defined(MREMAP_MAYMOVE) #if defined(MREMAP_MAYMOVE)
(flags & MDBX_MRESIZE_MAY_MOVE) ? MREMAP_MAYMOVE : (flags & MDBX_MRESIZE_MAY_MOVE) ? MREMAP_MAYMOVE :
#endif /* MREMAP_MAYMOVE */ #endif /* MREMAP_MAYMOVE */
@ -2482,11 +2481,11 @@ retry_mapview:;
if (ptr == MAP_FAILED) { if (ptr == MAP_FAILED) {
/* Try to mmap additional space beyond the end of mapping. */ /* Try to mmap additional space beyond the end of mapping. */
ptr = mmap(map->dxb + map->limit, limit - map->limit, mmap_prot, ptr = mmap(ptr_disp(map->base, map->limit), limit - map->limit, mmap_prot,
mmap_flags | MAP_FIXED_NOREPLACE, map->fd, map->limit); mmap_flags | MAP_FIXED_NOREPLACE, map->fd, map->limit);
if (ptr == map->dxb + map->limit) if (ptr == ptr_disp(map->base, map->limit))
/* успешно прилепили отображение в конец */ /* успешно прилепили отображение в конец */
ptr = map->dxb; ptr = map->base;
else if (ptr != MAP_FAILED) { else if (ptr != MAP_FAILED) {
/* the desired address is busy, unmap unsuitable one */ /* the desired address is busy, unmap unsuitable one */
if (unlikely(munmap(ptr, limit - map->limit))) { if (unlikely(munmap(ptr, limit - map->limit))) {
@ -2519,13 +2518,13 @@ retry_mapview:;
return MDBX_UNABLE_EXTEND_MAPSIZE; return MDBX_UNABLE_EXTEND_MAPSIZE;
} }
if (unlikely(munmap(map->address, map->limit))) { if (unlikely(munmap(map->base, map->limit))) {
assert(errno != 0); assert(errno != 0);
return errno; return errno;
} }
// coverity[pass_freed_arg : FALSE] // coverity[pass_freed_arg : FALSE]
ptr = mmap(map->address, limit, mmap_prot, ptr = mmap(map->base, limit, mmap_prot,
(flags & MDBX_MRESIZE_MAY_MOVE) (flags & MDBX_MRESIZE_MAY_MOVE)
? mmap_flags ? mmap_flags
: mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE : mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE
@ -2535,13 +2534,13 @@ retry_mapview:;
unlikely(ptr == MAP_FAILED) && !(flags & MDBX_MRESIZE_MAY_MOVE) && unlikely(ptr == MAP_FAILED) && !(flags & MDBX_MRESIZE_MAY_MOVE) &&
errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL) errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL)
// coverity[pass_freed_arg : FALSE] // coverity[pass_freed_arg : FALSE]
ptr = mmap(map->address, limit, mmap_prot, mmap_flags | MAP_FIXED, ptr =
map->fd, 0); mmap(map->base, limit, mmap_prot, mmap_flags | MAP_FIXED, map->fd, 0);
if (unlikely(ptr == MAP_FAILED)) { if (unlikely(ptr == MAP_FAILED)) {
/* try to restore prev mapping */ /* try to restore prev mapping */
// coverity[pass_freed_arg : FALSE] // coverity[pass_freed_arg : FALSE]
ptr = mmap(map->address, map->limit, mmap_prot, ptr = mmap(map->base, map->limit, mmap_prot,
(flags & MDBX_MRESIZE_MAY_MOVE) (flags & MDBX_MRESIZE_MAY_MOVE)
? mmap_flags ? mmap_flags
: mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE : mmap_flags | (MAP_FIXED_NOREPLACE ? MAP_FIXED_NOREPLACE
@ -2551,21 +2550,20 @@ retry_mapview:;
unlikely(ptr == MAP_FAILED) && !(flags & MDBX_MRESIZE_MAY_MOVE) && unlikely(ptr == MAP_FAILED) && !(flags & MDBX_MRESIZE_MAY_MOVE) &&
errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL) errno == /* kernel don't support MAP_FIXED_NOREPLACE */ EINVAL)
// coverity[pass_freed_arg : FALSE] // coverity[pass_freed_arg : FALSE]
ptr = mmap(map->address, map->limit, mmap_prot, mmap_flags | MAP_FIXED, ptr = mmap(map->base, map->limit, mmap_prot, mmap_flags | MAP_FIXED,
map->fd, 0); map->fd, 0);
if (unlikely(ptr == MAP_FAILED)) { if (unlikely(ptr == MAP_FAILED)) {
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current); VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current);
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
* when this memory will re-used by malloc or another mmapping. * when this memory will re-used by malloc or another mmapping.
* See * See
* https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
*/ */
MDBX_ASAN_UNPOISON_MEMORY_REGION( MDBX_ASAN_UNPOISON_MEMORY_REGION(
map->address, map->base, (map->current < map->limit) ? map->current : map->limit);
(map->current < map->limit) ? map->current : map->limit);
map->limit = 0; map->limit = 0;
map->current = 0; map->current = 0;
map->address = nullptr; map->base = nullptr;
assert(errno != 0); assert(errno != 0);
return errno; return errno;
} }
@ -2575,38 +2573,38 @@ retry_mapview:;
} }
assert(ptr && ptr != MAP_FAILED); assert(ptr && ptr != MAP_FAILED);
if (map->address != ptr) { if (map->base != ptr) {
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current); VALGRIND_MAKE_MEM_NOACCESS(map->base, map->current);
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
* when this memory will re-used by malloc or another mmapping. * when this memory will re-used by malloc or another mmapping.
* See * See
* https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203 * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
*/ */
MDBX_ASAN_UNPOISON_MEMORY_REGION( MDBX_ASAN_UNPOISON_MEMORY_REGION(
map->address, (map->current < map->limit) ? map->current : map->limit); map->base, (map->current < map->limit) ? map->current : map->limit);
VALGRIND_MAKE_MEM_DEFINED(ptr, map->current); VALGRIND_MAKE_MEM_DEFINED(ptr, map->current);
MDBX_ASAN_UNPOISON_MEMORY_REGION(ptr, map->current); MDBX_ASAN_UNPOISON_MEMORY_REGION(ptr, map->current);
map->address = ptr; map->base = ptr;
} }
map->limit = limit; map->limit = limit;
#if MDBX_ENABLE_MADVISE #if MDBX_ENABLE_MADVISE
#ifdef MADV_DONTFORK #ifdef MADV_DONTFORK
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0)) { if (unlikely(madvise(map->base, map->limit, MADV_DONTFORK) != 0)) {
assert(errno != 0); assert(errno != 0);
return errno; return errno;
} }
#endif /* MADV_DONTFORK */ #endif /* MADV_DONTFORK */
#ifdef MADV_NOHUGEPAGE #ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE); (void)madvise(map->base, map->limit, MADV_NOHUGEPAGE);
#endif /* MADV_NOHUGEPAGE */ #endif /* MADV_NOHUGEPAGE */
#endif /* MDBX_ENABLE_MADVISE */ #endif /* MDBX_ENABLE_MADVISE */
#endif /* POSIX / Windows */ #endif /* POSIX / Windows */
assert(rc != MDBX_SUCCESS || assert(rc != MDBX_SUCCESS ||
(map->address != nullptr && map->address != MAP_FAILED && (map->base != nullptr && map->base != MAP_FAILED &&
map->current == size && map->limit == limit)); map->current == size && map->limit == limit));
return rc; return rc;
} }

View File

@ -228,10 +228,9 @@ typedef wchar_t pathchar_t;
typedef char pathchar_t; typedef char pathchar_t;
#endif #endif
typedef struct osal_mmap_param { typedef struct osal_mmap {
union { union {
void *address; void *base;
uint8_t *dxb;
struct MDBX_lockinfo *lck; struct MDBX_lockinfo *lck;
}; };
mdbx_filehandle_t fd; mdbx_filehandle_t fd;