mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 16:54:13 +08:00
mdbx: log2shift instead of multiplication by pagesize.
Change-Id: I72c4c7699a409795459b466ff74c388b15b13687
This commit is contained in:
parent
9ef81ac16c
commit
802c10f13f
2
TODO.md
2
TODO.md
@ -26,7 +26,7 @@
|
|||||||
- [x] Поменять сигнатуры.
|
- [x] Поменять сигнатуры.
|
||||||
- [x] Добавить мета-страницы в coredump, проверить lck
|
- [x] Добавить мета-страницы в coredump, проверить lck
|
||||||
- [x] Сделать список для txnid_t, кода sizeof(txnid_t) > sizeof(pgno_t) и вернуть размер pgno_t
|
- [x] Сделать список для txnid_t, кода sizeof(txnid_t) > sizeof(pgno_t) и вернуть размер pgno_t
|
||||||
- [ ] Избавиться от умножения на размер страницы (заменить на сдвиг).
|
- [x] Избавиться от умножения на размер страницы (заменить на сдвиг).
|
||||||
- [ ] Устранение всех предупреждений (в том числе под Windows).
|
- [ ] Устранение всех предупреждений (в том числе под Windows).
|
||||||
- [ ] Перевод mdbx-tools на С++ и сборка для Windows
|
- [ ] Перевод mdbx-tools на С++ и сборка для Windows
|
||||||
- [ ] Заменить заглушки mdbx_version и mdbx_build
|
- [ ] Заменить заглушки mdbx_version и mdbx_build
|
||||||
|
17
src/bits.h
17
src/bits.h
@ -626,6 +626,7 @@ struct MDBX_env {
|
|||||||
#define MDBX_ENV_TXKEY 0x10000000U
|
#define MDBX_ENV_TXKEY 0x10000000U
|
||||||
uint32_t me_flags; /* see mdbx_env */
|
uint32_t me_flags; /* see mdbx_env */
|
||||||
unsigned me_psize; /* DB page size, inited from me_os_psize */
|
unsigned me_psize; /* DB page size, inited from me_os_psize */
|
||||||
|
unsigned me_psize2log; /* log2 of DB page size */
|
||||||
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
|
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
|
||||||
unsigned me_maxreaders; /* size of the reader table */
|
unsigned me_maxreaders; /* size of the reader table */
|
||||||
/* Max MDBX_lockinfo.mti_numreaders of interest to mdbx_env_close() */
|
/* Max MDBX_lockinfo.mti_numreaders of interest to mdbx_env_close() */
|
||||||
@ -920,7 +921,7 @@ static __inline size_t roundup2(size_t value, size_t granularity) {
|
|||||||
#define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP)
|
#define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP)
|
||||||
|
|
||||||
/* The number of overflow pages needed to store the given size. */
|
/* The number of overflow pages needed to store the given size. */
|
||||||
#define OVPAGES(size, psize) ((PAGEHDRSZ - 1 + (size)) / (psize) + 1)
|
#define OVPAGES(env, size) (bytes2pgno(env, PAGEHDRSZ - 1 + (size)) + 1)
|
||||||
|
|
||||||
/* Link in MDBX_txn.mt_loose_pages list.
|
/* Link in MDBX_txn.mt_loose_pages list.
|
||||||
* Kept outside the page header, which is needed when reusing the page. */
|
* Kept outside the page header, which is needed when reusing the page. */
|
||||||
@ -1112,3 +1113,17 @@ static __inline void SETDSZ(MDBX_node *node, unsigned size) {
|
|||||||
#else
|
#else
|
||||||
#define mdbx_cmp2int(a, b) (((a) > (b)) - ((b) > (a)))
|
#define mdbx_cmp2int(a, b) (((a) > (b)) - ((b) > (a)))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static __inline size_t pgno2bytes(const MDBX_env *env, pgno_t pgno) {
|
||||||
|
mdbx_assert(env, (1u << env->me_psize2log) == env->me_psize);
|
||||||
|
return ((size_t)pgno) << env->me_psize2log;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline MDBX_page *pgno2page(const MDBX_env *env, pgno_t pgno) {
|
||||||
|
return (MDBX_page *)(env->me_map + pgno2bytes(env, pgno));
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline pgno_t bytes2pgno(const MDBX_env *env, size_t bytes) {
|
||||||
|
mdbx_assert(env, (env->me_psize >> env->me_psize2log) == 1);
|
||||||
|
return (pgno_t)(bytes >> env->me_psize2log);
|
||||||
|
}
|
||||||
|
140
src/mdbx.c
140
src/mdbx.c
@ -553,8 +553,9 @@ int mdbx_runtime_flags = MDBX_DBG_PRINT
|
|||||||
|
|
||||||
MDBX_debug_func *mdbx_debug_logger;
|
MDBX_debug_func *mdbx_debug_logger;
|
||||||
|
|
||||||
static int mdbx_page_alloc(MDBX_cursor *mc, int num, MDBX_page **mp, int flags);
|
static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
|
||||||
static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, int num,
|
int flags);
|
||||||
|
static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, unsigned num,
|
||||||
MDBX_page **mp);
|
MDBX_page **mp);
|
||||||
static int mdbx_page_touch(MDBX_cursor *mc);
|
static int mdbx_page_touch(MDBX_cursor *mc);
|
||||||
static int mdbx_cursor_touch(MDBX_cursor *mc);
|
static int mdbx_cursor_touch(MDBX_cursor *mc);
|
||||||
@ -983,15 +984,15 @@ int mdbx_dcmp(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a,
|
|||||||
* Set MDBX_TXN_ERROR on failure. */
|
* Set MDBX_TXN_ERROR on failure. */
|
||||||
static MDBX_page *mdbx_page_malloc(MDBX_txn *txn, unsigned num) {
|
static MDBX_page *mdbx_page_malloc(MDBX_txn *txn, unsigned num) {
|
||||||
MDBX_env *env = txn->mt_env;
|
MDBX_env *env = txn->mt_env;
|
||||||
size_t size = env->me_psize;
|
|
||||||
MDBX_page *np = env->me_dpages;
|
MDBX_page *np = env->me_dpages;
|
||||||
|
size_t size = env->me_psize;
|
||||||
if (likely(num == 1 && np)) {
|
if (likely(num == 1 && np)) {
|
||||||
ASAN_UNPOISON_MEMORY_REGION(np, size);
|
ASAN_UNPOISON_MEMORY_REGION(np, size);
|
||||||
VALGRIND_MEMPOOL_ALLOC(env, np, size);
|
VALGRIND_MEMPOOL_ALLOC(env, np, size);
|
||||||
VALGRIND_MAKE_MEM_DEFINED(&np->mp_next, sizeof(np->mp_next));
|
VALGRIND_MAKE_MEM_DEFINED(&np->mp_next, sizeof(np->mp_next));
|
||||||
env->me_dpages = np->mp_next;
|
env->me_dpages = np->mp_next;
|
||||||
} else {
|
} else {
|
||||||
size *= num;
|
size = pgno2bytes(env, num);
|
||||||
np = malloc(size);
|
np = malloc(size);
|
||||||
if (unlikely(!np)) {
|
if (unlikely(!np)) {
|
||||||
txn->mt_flags |= MDBX_TXN_ERROR;
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
||||||
@ -1006,7 +1007,7 @@ static MDBX_page *mdbx_page_malloc(MDBX_txn *txn, unsigned num) {
|
|||||||
* many pages they will be filling in at least up to the last page. */
|
* many pages they will be filling in at least up to the last page. */
|
||||||
size_t skip = PAGEHDRSZ;
|
size_t skip = PAGEHDRSZ;
|
||||||
if (num > 1)
|
if (num > 1)
|
||||||
skip += (num - 1) * env->me_psize;
|
skip += pgno2bytes(env, num - 1);
|
||||||
memset((char *)np + skip, 0, size - skip);
|
memset((char *)np + skip, 0, size - skip);
|
||||||
}
|
}
|
||||||
VALGRIND_MAKE_MEM_UNDEFINED(np, size);
|
VALGRIND_MAKE_MEM_UNDEFINED(np, size);
|
||||||
@ -1048,7 +1049,7 @@ static void mdbx_dlist_free(MDBX_txn *txn) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void __cold mdbx_kill_page(MDBX_env *env, pgno_t pgno) {
|
static void __cold mdbx_kill_page(MDBX_env *env, pgno_t pgno) {
|
||||||
const size_t offs = env->me_psize * pgno;
|
const size_t offs = pgno2bytes(env, pgno);
|
||||||
const size_t shift = offsetof(MDBX_page, mp_pages);
|
const size_t shift = offsetof(MDBX_page, mp_pages);
|
||||||
|
|
||||||
if (env->me_flags & MDBX_WRITEMAP) {
|
if (env->me_flags & MDBX_WRITEMAP) {
|
||||||
@ -1241,7 +1242,7 @@ static int mdbx_page_spill(MDBX_cursor *m0, MDBX_val *key, MDBX_val *data) {
|
|||||||
i += txn->mt_dbs[MAIN_DBI].md_depth;
|
i += txn->mt_dbs[MAIN_DBI].md_depth;
|
||||||
/* For puts, roughly factor in the key+data size */
|
/* For puts, roughly factor in the key+data size */
|
||||||
if (key)
|
if (key)
|
||||||
i += (LEAFSIZE(key, data) + txn->mt_env->me_psize) / txn->mt_env->me_psize;
|
i += bytes2pgno(txn->mt_env, LEAFSIZE(key, data) + txn->mt_env->me_psize);
|
||||||
i += i; /* double it for good measure */
|
i += i; /* double it for good measure */
|
||||||
size_t need = i;
|
size_t need = i;
|
||||||
|
|
||||||
@ -1322,8 +1323,7 @@ bailout:
|
|||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
#define METAPAGE(env, n) \
|
#define METAPAGE(env, n) (&pgno2page(env, n)->mp_meta)
|
||||||
(&((MDBX_page *)((env)->me_map + env->me_psize * (n)))->mp_meta)
|
|
||||||
|
|
||||||
#define METAPAGE_END(env) METAPAGE(env, NUM_METAS)
|
#define METAPAGE_END(env) METAPAGE(env, NUM_METAS)
|
||||||
|
|
||||||
@ -1545,7 +1545,7 @@ static void mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
|
|||||||
#define MDBX_ALLOC_ALL \
|
#define MDBX_ALLOC_ALL \
|
||||||
(MDBX_ALLOC_CACHE | MDBX_ALLOC_GC | MDBX_ALLOC_NEW | MDBX_ALLOC_KICK)
|
(MDBX_ALLOC_CACHE | MDBX_ALLOC_GC | MDBX_ALLOC_NEW | MDBX_ALLOC_KICK)
|
||||||
|
|
||||||
static int mdbx_page_alloc(MDBX_cursor *mc, int num, MDBX_page **mp,
|
static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
|
||||||
int flags) {
|
int flags) {
|
||||||
int rc;
|
int rc;
|
||||||
MDBX_txn *txn = mc->mc_txn;
|
MDBX_txn *txn = mc->mc_txn;
|
||||||
@ -1829,10 +1829,10 @@ static int mdbx_page_alloc(MDBX_cursor *mc, int num, MDBX_page **mp,
|
|||||||
done:
|
done:
|
||||||
assert(mp && num);
|
assert(mp && num);
|
||||||
if (env->me_flags & MDBX_WRITEMAP) {
|
if (env->me_flags & MDBX_WRITEMAP) {
|
||||||
np = (MDBX_page *)(env->me_map + env->me_psize * pgno);
|
np = pgno2page(env, pgno);
|
||||||
/* LY: reset no-access flag from mdbx_kill_page() */
|
/* LY: reset no-access flag from mdbx_kill_page() */
|
||||||
VALGRIND_MAKE_MEM_UNDEFINED(np, env->me_psize * num);
|
VALGRIND_MAKE_MEM_UNDEFINED(np, pgno2bytes(env, num));
|
||||||
ASAN_UNPOISON_MEMORY_REGION(np, env->me_psize * num);
|
ASAN_UNPOISON_MEMORY_REGION(np, pgno2bytes(env, num));
|
||||||
} else {
|
} else {
|
||||||
if (unlikely(!(np = mdbx_page_malloc(txn, num)))) {
|
if (unlikely(!(np = mdbx_page_malloc(txn, num)))) {
|
||||||
rc = MDBX_ENOMEM;
|
rc = MDBX_ENOMEM;
|
||||||
@ -1849,8 +1849,8 @@ done:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (env->me_flags & MDBX_PAGEPERTURB)
|
if (env->me_flags & MDBX_PAGEPERTURB)
|
||||||
memset(np, 0x71 /* 'q', 113 */, env->me_psize * num);
|
memset(np, 0x71 /* 'q', 113 */, pgno2bytes(env, num));
|
||||||
VALGRIND_MAKE_MEM_UNDEFINED(np, env->me_psize * num);
|
VALGRIND_MAKE_MEM_UNDEFINED(np, pgno2bytes(env, num));
|
||||||
|
|
||||||
np->mp_pgno = pgno;
|
np->mp_pgno = pgno;
|
||||||
np->mp_leaf2_ksize = 0;
|
np->mp_leaf2_ksize = 0;
|
||||||
@ -1908,18 +1908,15 @@ static int mdbx_page_unspill(MDBX_txn *txn, MDBX_page *mp, MDBX_page **ret) {
|
|||||||
int num;
|
int num;
|
||||||
if (txn->mt_dirtyroom == 0)
|
if (txn->mt_dirtyroom == 0)
|
||||||
return MDBX_TXN_FULL;
|
return MDBX_TXN_FULL;
|
||||||
if (IS_OVERFLOW(mp))
|
num = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
|
||||||
num = mp->mp_pages;
|
|
||||||
else
|
|
||||||
num = 1;
|
|
||||||
if (env->me_flags & MDBX_WRITEMAP) {
|
if (env->me_flags & MDBX_WRITEMAP) {
|
||||||
np = mp;
|
np = mp;
|
||||||
} else {
|
} else {
|
||||||
np = mdbx_page_malloc(txn, num);
|
np = mdbx_page_malloc(txn, num);
|
||||||
if (unlikely(!np))
|
if (unlikely(!np))
|
||||||
return MDBX_ENOMEM;
|
return MDBX_ENOMEM;
|
||||||
if (num > 1)
|
if (unlikely(num > 1))
|
||||||
memcpy(np, mp, num * env->me_psize);
|
memcpy(np, mp, pgno2bytes(env, num));
|
||||||
else
|
else
|
||||||
mdbx_page_copy(np, mp, env->me_psize);
|
mdbx_page_copy(np, mp, env->me_psize);
|
||||||
}
|
}
|
||||||
@ -1932,8 +1929,7 @@ static int mdbx_page_unspill(MDBX_txn *txn, MDBX_page *mp, MDBX_page **ret) {
|
|||||||
else
|
else
|
||||||
txn->mt_spill_pages[x] |= 1;
|
txn->mt_spill_pages[x] |= 1;
|
||||||
} /* otherwise, if belonging to a parent txn, the
|
} /* otherwise, if belonging to a parent txn, the
|
||||||
* page remains spilled until child commits
|
* page remains spilled until child commits */
|
||||||
*/
|
|
||||||
|
|
||||||
mdbx_page_dirty(txn, np);
|
mdbx_page_dirty(txn, np);
|
||||||
np->mp_flags |= P_DIRTY;
|
np->mp_flags |= P_DIRTY;
|
||||||
@ -2077,10 +2073,11 @@ int mdbx_env_sync(MDBX_env *env, int force) {
|
|||||||
env->me_sync_pending >= env->me_sync_threshold))
|
env->me_sync_pending >= env->me_sync_threshold))
|
||||||
flags &= MDBX_WRITEMAP /* clear flags for full steady sync */;
|
flags &= MDBX_WRITEMAP /* clear flags for full steady sync */;
|
||||||
|
|
||||||
if (env->me_sync_pending > env->me_psize * 16 &&
|
if (env->me_sync_pending >
|
||||||
|
pgno2bytes(env, 16 /* FIXME: define threshold */) &&
|
||||||
(flags & MDBX_NOSYNC) == 0) {
|
(flags & MDBX_NOSYNC) == 0) {
|
||||||
assert(((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
|
assert(((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
|
||||||
size_t used_size = env->me_psize * (head->mm_last_pg + 1);
|
size_t used_size = pgno2bytes(env, head->mm_last_pg + 1);
|
||||||
mdbx_txn_unlock(env);
|
mdbx_txn_unlock(env);
|
||||||
|
|
||||||
/* LY: pre-sync without holding lock to reduce latency for writer(s) */
|
/* LY: pre-sync without holding lock to reduce latency for writer(s) */
|
||||||
@ -3036,7 +3033,7 @@ bailout:
|
|||||||
static int mdbx_page_flush(MDBX_txn *txn, size_t keep) {
|
static int mdbx_page_flush(MDBX_txn *txn, size_t keep) {
|
||||||
MDBX_env *env = txn->mt_env;
|
MDBX_env *env = txn->mt_env;
|
||||||
MDBX_ID2L dl = txn->mt_rw_dirtylist;
|
MDBX_ID2L dl = txn->mt_rw_dirtylist;
|
||||||
unsigned psize = env->me_psize, j;
|
unsigned j;
|
||||||
int i, pagecount = dl[0].mid, rc;
|
int i, pagecount = dl[0].mid, rc;
|
||||||
size_t size = 0, pos = 0;
|
size_t size = 0, pos = 0;
|
||||||
pgno_t pgno = 0;
|
pgno_t pgno = 0;
|
||||||
@ -3059,7 +3056,8 @@ static int mdbx_page_flush(MDBX_txn *txn, size_t keep) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
dp->mp_flags &= ~P_DIRTY;
|
dp->mp_flags &= ~P_DIRTY;
|
||||||
env->me_sync_pending += IS_OVERFLOW(dp) ? psize * dp->mp_pages : psize;
|
env->me_sync_pending +=
|
||||||
|
IS_OVERFLOW(dp) ? pgno2bytes(env, dp->mp_pages) : env->me_psize;
|
||||||
}
|
}
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@ -3077,10 +3075,8 @@ static int mdbx_page_flush(MDBX_txn *txn, size_t keep) {
|
|||||||
pgno = dl[i].mid;
|
pgno = dl[i].mid;
|
||||||
/* clear dirty flag */
|
/* clear dirty flag */
|
||||||
dp->mp_flags &= ~P_DIRTY;
|
dp->mp_flags &= ~P_DIRTY;
|
||||||
pos = pgno * psize;
|
pos = pgno2bytes(env, pgno);
|
||||||
size = psize;
|
size = IS_OVERFLOW(dp) ? pgno2bytes(env, dp->mp_pages) : env->me_psize;
|
||||||
if (IS_OVERFLOW(dp))
|
|
||||||
size *= dp->mp_pages;
|
|
||||||
env->me_sync_pending += size;
|
env->me_sync_pending += size;
|
||||||
}
|
}
|
||||||
/* Write up to MDBX_COMMIT_PAGES dirty pages at a time. */
|
/* Write up to MDBX_COMMIT_PAGES dirty pages at a time. */
|
||||||
@ -3107,7 +3103,7 @@ static int mdbx_page_flush(MDBX_txn *txn, size_t keep) {
|
|||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
|
|
||||||
mdbx_invalidate_cache(env->me_map, txn->mt_next_pgno * env->me_psize);
|
mdbx_invalidate_cache(env->me_map, pgno2bytes(env, txn->mt_next_pgno));
|
||||||
|
|
||||||
for (i = keep; ++i <= pagecount;) {
|
for (i = keep; ++i <= pagecount;) {
|
||||||
dp = dl[i].mptr;
|
dp = dl[i].mptr;
|
||||||
@ -3597,7 +3593,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
MDBX_meta *const head = mdbx_meta_head(env);
|
MDBX_meta *const head = mdbx_meta_head(env);
|
||||||
|
|
||||||
const size_t prev_mapsize = head->mm_mapsize;
|
const size_t prev_mapsize = head->mm_mapsize;
|
||||||
const size_t used_size = env->me_psize * (pending->mm_last_pg + 1);
|
const size_t used_size = pgno2bytes(env, pending->mm_last_pg + 1);
|
||||||
|
|
||||||
mdbx_assert(env, mdbx_meta_eq_mask(env) == 0);
|
mdbx_assert(env, mdbx_meta_eq_mask(env) == 0);
|
||||||
mdbx_assert(env,
|
mdbx_assert(env,
|
||||||
@ -3838,13 +3834,25 @@ int mdbx_get_maxkeysize(size_t pagesize) {
|
|||||||
return (maxkey > 0 && maxkey < INT_MAX) ? (int)maxkey : -MDBX_EINVAL;
|
return (maxkey > 0 && maxkey < INT_MAX) ? (int)maxkey : -MDBX_EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __cold mdbx_env_setup_limits(MDBX_env *env, size_t pagesize) {
|
static void __cold mdbx_setup_pagesize(MDBX_env *env, size_t pagesize) {
|
||||||
env->me_maxfree_1pg = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
|
mdbx_ensure(env, is_power2(pagesize));
|
||||||
env->me_maxpg = env->me_mapsize / pagesize;
|
mdbx_ensure(env, pagesize >= MIN_PAGESIZE);
|
||||||
|
mdbx_ensure(env, pagesize <= MAX_PAGESIZE);
|
||||||
|
|
||||||
|
env->me_psize = pagesize;
|
||||||
|
env->me_maxfree_1pg = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
|
||||||
env->me_nodemax = mdbx_calc_nodemax(pagesize);
|
env->me_nodemax = mdbx_calc_nodemax(pagesize);
|
||||||
env->me_maxkey_limit = mdbx_calc_maxkey(env->me_nodemax);
|
env->me_maxkey_limit = mdbx_calc_maxkey(env->me_nodemax);
|
||||||
assert(env->me_maxkey_limit > 42 && env->me_maxkey_limit < pagesize);
|
mdbx_assert(env,
|
||||||
|
env->me_maxkey_limit > 42 && env->me_maxkey_limit < pagesize);
|
||||||
|
|
||||||
|
env->me_psize2log = 0;
|
||||||
|
while (pagesize > 1) {
|
||||||
|
env->me_psize2log += 1;
|
||||||
|
pagesize >>= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
env->me_maxpg = bytes2pgno(env, env->me_mapsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
int __cold mdbx_env_create(MDBX_env **penv) {
|
int __cold mdbx_env_create(MDBX_env **penv) {
|
||||||
@ -3865,7 +3873,7 @@ int __cold mdbx_env_create(MDBX_env **penv) {
|
|||||||
rc = MDBX_INCOMPATIBLE;
|
rc = MDBX_INCOMPATIBLE;
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
mdbx_env_setup_limits(env, env->me_os_psize);
|
mdbx_setup_pagesize(env, env->me_os_psize);
|
||||||
|
|
||||||
rc = mdbx_fastmutex_init(&env->me_dbi_lock);
|
rc = mdbx_fastmutex_init(&env->me_dbi_lock);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
@ -3910,8 +3918,8 @@ static int __cold mdbx_env_map(MDBX_env *env, void *addr, size_t usedsize) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
|
#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
|
||||||
const size_t meta_length = env->me_psize * NUM_METAS;
|
const size_t meta_length = pgno2bytes(env, NUM_METAS);
|
||||||
(void)madvise(env->me_map, env->me_psize * NUM_METAS, MADV_DODUMP);
|
(void)madvise(env->me_map, meta_length, MADV_DODUMP);
|
||||||
if (!(flags & MDBX_PAGEPERTURB))
|
if (!(flags & MDBX_PAGEPERTURB))
|
||||||
(void)madvise(env->me_map + meta_length, env->me_mapsize - meta_length,
|
(void)madvise(env->me_map + meta_length, env->me_mapsize - meta_length,
|
||||||
MADV_DONTDUMP);
|
MADV_DONTDUMP);
|
||||||
@ -3935,7 +3943,7 @@ static int __cold mdbx_env_map(MDBX_env *env, void *addr, size_t usedsize) {
|
|||||||
/* Lock meta pages to avoid unexpected write,
|
/* Lock meta pages to avoid unexpected write,
|
||||||
* before the data pages would be synchronized. */
|
* before the data pages would be synchronized. */
|
||||||
if (flags & MDBX_WRITEMAP) {
|
if (flags & MDBX_WRITEMAP) {
|
||||||
rc = mdbx_mlock(env->me_map, env->me_psize * NUM_METAS);
|
rc = mdbx_mlock(env->me_map, pgno2bytes(env, NUM_METAS));
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
@ -3955,7 +3963,7 @@ int __cold mdbx_env_set_mapsize(MDBX_env *env, size_t size) {
|
|||||||
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
|
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
|
||||||
return MDBX_EBADSIGN;
|
return MDBX_EBADSIGN;
|
||||||
|
|
||||||
if (unlikely(size < env->me_psize * 8))
|
if (unlikely(size < pgno2bytes(env, MIN_PAGENO)))
|
||||||
return MDBX_EINVAL;
|
return MDBX_EINVAL;
|
||||||
|
|
||||||
/* If env is already open, caller is responsible for making
|
/* If env is already open, caller is responsible for making
|
||||||
@ -3971,7 +3979,7 @@ int __cold mdbx_env_set_mapsize(MDBX_env *env, size_t size) {
|
|||||||
if (!size)
|
if (!size)
|
||||||
size = meta->mm_mapsize;
|
size = meta->mm_mapsize;
|
||||||
/* Silently round up to minimum if the size is too small */
|
/* Silently round up to minimum if the size is too small */
|
||||||
const size_t usedsize = (meta->mm_last_pg + 1) * env->me_psize;
|
const size_t usedsize = pgno2bytes(env, meta->mm_last_pg + 1);
|
||||||
if (size < usedsize)
|
if (size < usedsize)
|
||||||
size = usedsize;
|
size = usedsize;
|
||||||
|
|
||||||
@ -3992,8 +4000,7 @@ int __cold mdbx_env_set_mapsize(MDBX_env *env, size_t size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
env->me_mapsize = size;
|
env->me_mapsize = size;
|
||||||
if (env->me_psize)
|
env->me_maxpg = bytes2pgno(env, env->me_mapsize);
|
||||||
env->me_maxpg = env->me_mapsize / env->me_psize;
|
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4053,9 +4060,9 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
|
|||||||
env->me_psize = env->me_os_psize;
|
env->me_psize = env->me_os_psize;
|
||||||
if (env->me_psize > MAX_PAGESIZE)
|
if (env->me_psize > MAX_PAGESIZE)
|
||||||
env->me_psize = MAX_PAGESIZE;
|
env->me_psize = MAX_PAGESIZE;
|
||||||
|
mdbx_ensure(env, is_power2(env->me_psize));
|
||||||
|
mdbx_ensure(env, env->me_psize >= MIN_PAGESIZE);
|
||||||
|
|
||||||
assert(is_power2(env->me_psize));
|
|
||||||
assert(env->me_psize >= MIN_PAGESIZE);
|
|
||||||
env->me_mapsize = roundup2(
|
env->me_mapsize = roundup2(
|
||||||
env->me_mapsize ? env->me_mapsize : DEFAULT_MAPSIZE, env->me_os_psize);
|
env->me_mapsize ? env->me_mapsize : DEFAULT_MAPSIZE, env->me_os_psize);
|
||||||
|
|
||||||
@ -4080,7 +4087,8 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
|
|||||||
return err;
|
return err;
|
||||||
} else {
|
} else {
|
||||||
env->me_psize = meta.mm_psize;
|
env->me_psize = meta.mm_psize;
|
||||||
if (!is_power2(env->me_psize) || env->me_psize < MIN_PAGESIZE) {
|
if (!is_power2(env->me_psize) || env->me_psize < MIN_PAGESIZE ||
|
||||||
|
env->me_psize > MAX_PAGESIZE) {
|
||||||
mdbx_error("wrong pagesize %u (system %u)", env->me_psize,
|
mdbx_error("wrong pagesize %u (system %u)", env->me_psize,
|
||||||
env->me_os_psize);
|
env->me_os_psize);
|
||||||
return MDBX_WANNA_RECOVERY;
|
return MDBX_WANNA_RECOVERY;
|
||||||
@ -4088,8 +4096,8 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
|
|||||||
|
|
||||||
/* Make sure mapsize >= committed data size. Even when using
|
/* Make sure mapsize >= committed data size. Even when using
|
||||||
* mm_mapsize, which could be broken in old files (ITS#7789). */
|
* mm_mapsize, which could be broken in old files (ITS#7789). */
|
||||||
const size_t usedsize =
|
const size_t usedsize = roundup2(
|
||||||
roundup2((meta.mm_last_pg + 1) * env->me_psize, env->me_os_psize);
|
(meta.mm_last_pg + 1) * (size_t)env->me_psize, env->me_os_psize);
|
||||||
if (meta.mm_mapsize < usedsize)
|
if (meta.mm_mapsize < usedsize)
|
||||||
meta.mm_mapsize = usedsize;
|
meta.mm_mapsize = usedsize;
|
||||||
|
|
||||||
@ -4101,6 +4109,8 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
|
|||||||
env->me_mapsize = usedsize;
|
env->me_mapsize = usedsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mdbx_setup_pagesize(env, env->me_psize);
|
||||||
|
|
||||||
uint64_t size;
|
uint64_t size;
|
||||||
err = mdbx_filesize(env->me_fd, &size);
|
err = mdbx_filesize(env->me_fd, &size);
|
||||||
if (unlikely(err != MDBX_SUCCESS))
|
if (unlikely(err != MDBX_SUCCESS))
|
||||||
@ -4184,7 +4194,6 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, int lck_rc) {
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
mdbx_env_setup_limits(env, env->me_psize);
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4894,7 +4903,7 @@ static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
|
|||||||
level = 0;
|
level = 0;
|
||||||
|
|
||||||
mapped:
|
mapped:
|
||||||
p = (MDBX_page *)(env->me_map + env->me_psize * pgno);
|
p = pgno2page(env, pgno);
|
||||||
|
|
||||||
done:
|
done:
|
||||||
*ret = p;
|
*ret = p;
|
||||||
@ -6294,7 +6303,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
|||||||
if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
|
if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
|
||||||
MDBX_page *omp;
|
MDBX_page *omp;
|
||||||
pgno_t pg;
|
pgno_t pg;
|
||||||
int level, ovpages, dpages = OVPAGES(data->iov_len, env->me_psize);
|
int level, ovpages, dpages = OVPAGES(env, data->iov_len);
|
||||||
|
|
||||||
memcpy(&pg, olddata.iov_base, sizeof(pg));
|
memcpy(&pg, olddata.iov_base, sizeof(pg));
|
||||||
if (unlikely((rc2 = mdbx_page_get(mc, pg, &omp, &level)) != 0))
|
if (unlikely((rc2 = mdbx_page_get(mc, pg, &omp, &level)) != 0))
|
||||||
@ -6330,11 +6339,12 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
|||||||
/* Currently we make the page look as with put() in the
|
/* Currently we make the page look as with put() in the
|
||||||
* parent txn, in case the user peeks at MDBX_RESERVEd
|
* parent txn, in case the user peeks at MDBX_RESERVEd
|
||||||
* or unused parts. Some users treat ovpages specially. */
|
* or unused parts. Some users treat ovpages specially. */
|
||||||
size_t whole = (size_t)env->me_psize * ovpages;
|
const size_t whole = pgno2bytes(env, ovpages);
|
||||||
/* Skip the part where MDBX will put *data.
|
/* Skip the part where MDBX will put *data.
|
||||||
* Copy end of page, adjusting alignment so
|
* Copy end of page, adjusting alignment so
|
||||||
* compiler may copy words instead of bytes. */
|
* compiler may copy words instead of bytes. */
|
||||||
size_t off = (PAGEHDRSZ + data->iov_len) & -(ssize_t)sizeof(size_t);
|
const size_t off =
|
||||||
|
(PAGEHDRSZ + data->iov_len) & -(ssize_t)sizeof(size_t);
|
||||||
memcpy((size_t *)((char *)np + off), (size_t *)((char *)omp + off),
|
memcpy((size_t *)((char *)np + off), (size_t *)((char *)omp + off),
|
||||||
whole - off);
|
whole - off);
|
||||||
memcpy(np, omp, PAGEHDRSZ); /* Copy header of page */
|
memcpy(np, omp, PAGEHDRSZ); /* Copy header of page */
|
||||||
@ -6637,7 +6647,7 @@ fail:
|
|||||||
* [out] mp Address of a page, or NULL on failure.
|
* [out] mp Address of a page, or NULL on failure.
|
||||||
*
|
*
|
||||||
* Returns 0 on success, non-zero on failure. */
|
* Returns 0 on success, non-zero on failure. */
|
||||||
static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, int num,
|
static int mdbx_page_new(MDBX_cursor *mc, uint32_t flags, unsigned num,
|
||||||
MDBX_page **mp) {
|
MDBX_page **mp) {
|
||||||
MDBX_page *np;
|
MDBX_page *np;
|
||||||
int rc;
|
int rc;
|
||||||
@ -6779,7 +6789,7 @@ static int mdbx_node_add(MDBX_cursor *mc, indx_t indx, MDBX_val *key,
|
|||||||
node_size += sizeof(pgno_t);
|
node_size += sizeof(pgno_t);
|
||||||
} else if (unlikely(node_size + data->iov_len >
|
} else if (unlikely(node_size + data->iov_len >
|
||||||
mc->mc_txn->mt_env->me_nodemax)) {
|
mc->mc_txn->mt_env->me_nodemax)) {
|
||||||
int ovpages = OVPAGES(data->iov_len, mc->mc_txn->mt_env->me_psize);
|
unsigned ovpages = OVPAGES(mc->mc_txn->mt_env, data->iov_len);
|
||||||
int rc;
|
int rc;
|
||||||
/* Put data on overflow page. */
|
/* Put data on overflow page. */
|
||||||
mdbx_debug("data size is %" PRIuPTR ", node would be %" PRIuPTR
|
mdbx_debug("data size is %" PRIuPTR ", node would be %" PRIuPTR
|
||||||
@ -8646,7 +8656,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
|
|||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
/* Make cursor pages writable */
|
/* Make cursor pages writable */
|
||||||
buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum);
|
buf = ptr = malloc(pgno2bytes(my->mc_env, mc.mc_snum));
|
||||||
if (buf == NULL)
|
if (buf == NULL)
|
||||||
return MDBX_ENOMEM;
|
return MDBX_ENOMEM;
|
||||||
|
|
||||||
@ -8698,7 +8708,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
|
|||||||
my->mc_next_pgno += omp->mp_pages;
|
my->mc_next_pgno += omp->mp_pages;
|
||||||
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
||||||
if (omp->mp_pages > 1) {
|
if (omp->mp_pages > 1) {
|
||||||
my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1);
|
my->mc_olen[toggle] = pgno2bytes(my->mc_env, omp->mp_pages - 1);
|
||||||
my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize;
|
my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize;
|
||||||
rc = mdbx_env_cthr_toggle(my, 1);
|
rc = mdbx_env_cthr_toggle(my, 1);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
@ -8843,7 +8853,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, mdbx_filehandle_t fd) {
|
|||||||
/* update signature */
|
/* update signature */
|
||||||
meta->mp_meta.mm_datasync_sign = mdbx_meta_sign(&meta->mp_meta);
|
meta->mp_meta.mm_datasync_sign = mdbx_meta_sign(&meta->mp_meta);
|
||||||
|
|
||||||
my.mc_wlen[0] = env->me_psize * NUM_METAS;
|
my.mc_wlen[0] = pgno2bytes(env, NUM_METAS);
|
||||||
my.mc_txn = txn;
|
my.mc_txn = txn;
|
||||||
rc = mdbx_env_cwalk(&my, &root, 0);
|
rc = mdbx_env_cwalk(&my, &root, 0);
|
||||||
if (rc == MDBX_SUCCESS && root != new_root) {
|
if (rc == MDBX_SUCCESS && root != new_root) {
|
||||||
@ -8890,12 +8900,12 @@ static int __cold mdbx_env_copy_asis(MDBX_env *env, mdbx_filehandle_t fd) {
|
|||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = mdbx_write(fd, env->me_map, env->me_psize * NUM_METAS);
|
rc = mdbx_write(fd, env->me_map, pgno2bytes(env, NUM_METAS));
|
||||||
mdbx_txn_unlock(env);
|
mdbx_txn_unlock(env);
|
||||||
|
|
||||||
if (likely(rc == MDBX_SUCCESS))
|
if (likely(rc == MDBX_SUCCESS))
|
||||||
rc = mdbx_write(fd, env->me_map + env->me_psize * NUM_METAS,
|
rc = mdbx_write(fd, env->me_map + pgno2bytes(env, NUM_METAS),
|
||||||
(txn->mt_next_pgno - NUM_METAS) * env->me_psize);
|
pgno2bytes(env, txn->mt_next_pgno - NUM_METAS));
|
||||||
|
|
||||||
if (likely(rc == MDBX_SUCCESS))
|
if (likely(rc == MDBX_SUCCESS))
|
||||||
rc = mdbx_ftruncate(fd, env->me_mapsize);
|
rc = mdbx_ftruncate(fd, env->me_mapsize);
|
||||||
@ -9932,7 +9942,7 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi,
|
|||||||
|
|
||||||
over_header = PAGEHDRSZ;
|
over_header = PAGEHDRSZ;
|
||||||
over_payload = NODEDSZ(node);
|
over_payload = NODEDSZ(node);
|
||||||
over_unused = omp->mp_pages * ctx->mw_txn->mt_env->me_psize -
|
over_unused = pgno2bytes(ctx->mw_txn->mt_env, omp->mp_pages) -
|
||||||
over_payload - over_header;
|
over_payload - over_header;
|
||||||
|
|
||||||
rc = ctx->mw_visitor(*opg, omp->mp_pages, ctx->mw_user, dbi,
|
rc = ctx->mw_visitor(*opg, omp->mp_pages, ctx->mw_user, dbi,
|
||||||
@ -10341,7 +10351,7 @@ int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
|
|||||||
* во время транзакции) если адрес находится внутри mmap-диапазона
|
* во время транзакции) если адрес находится внутри mmap-диапазона
|
||||||
* и в заголовке страницы нет флажка P_DIRTY. */
|
* и в заголовке страницы нет флажка P_DIRTY. */
|
||||||
if (env->me_map < (char *)page) {
|
if (env->me_map < (char *)page) {
|
||||||
const size_t used_size = env->me_psize * txn->mt_next_pgno;
|
const size_t used_size = pgno2bytes(env, txn->mt_next_pgno);
|
||||||
if ((char *)page < env->me_map + used_size) {
|
if ((char *)page < env->me_map + used_size) {
|
||||||
/* страница внутри диапазона, смотрим на флажки */
|
/* страница внутри диапазона, смотрим на флажки */
|
||||||
return (page->mp_flags & (P_DIRTY | P_LOOSE | P_KEEP))
|
return (page->mp_flags & (P_DIRTY | P_LOOSE | P_KEEP))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user