mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-30 22:47:16 +08:00
mdbx: использование msync(MS_ASYNC)
для спиллинга в режиме MDBX_WRITEMAP вне зависимости от MDBX_AVOID_MSYNC
и MDBX_MMAP_USE_MS_ASYNC
.
This commit is contained in:
parent
e9a2042df1
commit
fe55f25665
196
src/core.c
196
src/core.c
@ -4179,7 +4179,6 @@ __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
|
|||||||
static __inline void page_wash(MDBX_txn *txn, const size_t di,
|
static __inline void page_wash(MDBX_txn *txn, const size_t di,
|
||||||
MDBX_page *const mp, const size_t npages) {
|
MDBX_page *const mp, const size_t npages) {
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
tASSERT(txn, (di > 0) == (txn->tw.dirtylist != nullptr));
|
|
||||||
mp->mp_txnid = INVALID_TXNID;
|
mp->mp_txnid = INVALID_TXNID;
|
||||||
mp->mp_flags = P_BAD;
|
mp->mp_flags = P_BAD;
|
||||||
|
|
||||||
@ -4194,10 +4193,13 @@ static __inline void page_wash(MDBX_txn *txn, const size_t di,
|
|||||||
(txn->mt_parent ? txn->mt_parent->tw.dirtyroom
|
(txn->mt_parent ? txn->mt_parent->tw.dirtyroom
|
||||||
: txn->mt_env->me_options.dp_limit));
|
: txn->mt_env->me_options.dp_limit));
|
||||||
} else {
|
} else {
|
||||||
tASSERT(txn, txn->tw.dirtylist == nullptr);
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP));
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
|
if (txn->tw.dirtylist == nullptr) {
|
||||||
tASSERT(txn, txn->tw.writemap_dirty_npages >= npages);
|
tASSERT(txn, !MDBX_AVOID_MSYNC);
|
||||||
txn->tw.writemap_dirty_npages -= npages;
|
txn->tw.writemap_dirty_npages -= (txn->tw.writemap_dirty_npages > npages)
|
||||||
|
? npages
|
||||||
|
: txn->tw.writemap_dirty_npages;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ);
|
VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ);
|
||||||
@ -4686,14 +4688,13 @@ __must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
|
|||||||
|
|
||||||
static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
|
static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
|
||||||
const size_t npages) {
|
const size_t npages) {
|
||||||
tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP) || MDBX_AVOID_MSYNC);
|
tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP));
|
||||||
#if MDBX_ENABLE_PGOP_STAT
|
#if MDBX_ENABLE_PGOP_STAT
|
||||||
txn->mt_env->me_lck->mti_pgop_stat.spill.weak += npages;
|
txn->mt_env->me_lck->mti_pgop_stat.spill.weak += npages;
|
||||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||||
const pgno_t pgno = dp->mp_pgno;
|
const pgno_t pgno = dp->mp_pgno;
|
||||||
int err = iov_page(txn, ctx, dp, npages);
|
int err = iov_page(txn, ctx, dp, npages);
|
||||||
if (likely(err == MDBX_SUCCESS) &&
|
if (likely(err == MDBX_SUCCESS))
|
||||||
(!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)))
|
|
||||||
err = pnl_append_range(true, &txn->tw.spilled.list, pgno << 1, npages);
|
err = pnl_append_range(true, &txn->tw.spilled.list, pgno << 1, npages);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -4702,7 +4703,7 @@ static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
|
|||||||
* Returns the number of pages marked as unspillable. */
|
* Returns the number of pages marked as unspillable. */
|
||||||
static size_t cursor_keep(const MDBX_txn *const txn, const MDBX_cursor *mc) {
|
static size_t cursor_keep(const MDBX_txn *const txn, const MDBX_cursor *mc) {
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0);
|
||||||
size_t keep = 0;
|
size_t keep = 0;
|
||||||
while ((mc->mc_flags & C_INITIALIZED) && mc->mc_snum) {
|
while ((mc->mc_flags & C_INITIALIZED) && mc->mc_snum) {
|
||||||
tASSERT(txn, mc->mc_top == mc->mc_snum - 1);
|
tASSERT(txn, mc->mc_top == mc->mc_snum - 1);
|
||||||
@ -4736,7 +4737,8 @@ static size_t cursor_keep(const MDBX_txn *const txn, const MDBX_cursor *mc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static size_t txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
|
static size_t txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0);
|
||||||
txn_lru_turn(txn);
|
txn_lru_turn(txn);
|
||||||
size_t keep = m0 ? cursor_keep(txn, m0) : 0;
|
size_t keep = m0 ? cursor_keep(txn, m0) : 0;
|
||||||
for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i)
|
for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i)
|
||||||
@ -4839,13 +4841,15 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
static __inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
static __inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
||||||
const size_t need) {
|
const size_t need) {
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
|
||||||
tASSERT(txn, !m0 || cursor_is_tracked(m0));
|
tASSERT(txn, !m0 || cursor_is_tracked(m0));
|
||||||
|
|
||||||
intptr_t wanna_spill_entries = need - txn->tw.dirtyroom - txn->tw.loose_count;
|
const intptr_t wanna_spill_entries =
|
||||||
intptr_t wanna_spill_npages =
|
txn->tw.dirtylist ? (need - txn->tw.dirtyroom - txn->tw.loose_count) : 0;
|
||||||
need + txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count -
|
const intptr_t wanna_spill_npages =
|
||||||
txn->mt_env->me_options.dp_limit;
|
need +
|
||||||
|
(txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
||||||
|
: txn->tw.writemap_dirty_npages) -
|
||||||
|
txn->tw.loose_count - txn->mt_env->me_options.dp_limit;
|
||||||
|
|
||||||
/* production mode */
|
/* production mode */
|
||||||
if (likely(wanna_spill_npages < 1 && wanna_spill_entries < 1)
|
if (likely(wanna_spill_npages < 1 && wanna_spill_entries < 1)
|
||||||
@ -4882,15 +4886,19 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
const intptr_t wanna_spill_npages,
|
const intptr_t wanna_spill_npages,
|
||||||
const size_t need) {
|
const size_t need) {
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
|
||||||
|
|
||||||
int rc = MDBX_SUCCESS;
|
int rc = MDBX_SUCCESS;
|
||||||
if (unlikely(txn->tw.dirtylist->length <= txn->tw.loose_count))
|
if (unlikely(txn->tw.loose_count >=
|
||||||
|
(txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
||||||
|
: txn->tw.writemap_dirty_npages)))
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
const size_t dirty_entries = txn->tw.dirtylist->length - txn->tw.loose_count;
|
const size_t dirty_entries =
|
||||||
|
txn->tw.dirtylist ? (txn->tw.dirtylist->length - txn->tw.loose_count) : 1;
|
||||||
const size_t dirty_npages =
|
const size_t dirty_npages =
|
||||||
txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count;
|
(txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
||||||
|
: txn->tw.writemap_dirty_npages) -
|
||||||
|
txn->tw.loose_count;
|
||||||
const size_t need_spill_entries =
|
const size_t need_spill_entries =
|
||||||
spill_gate(txn->mt_env, wanna_spill_entries, dirty_entries);
|
spill_gate(txn->mt_env, wanna_spill_entries, dirty_entries);
|
||||||
const size_t need_spill_npages =
|
const size_t need_spill_npages =
|
||||||
@ -4902,17 +4910,18 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
if (!need_spill)
|
if (!need_spill)
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
#if !MDBX_AVOID_MSYNC
|
|
||||||
if (txn->mt_flags & MDBX_WRITEMAP) {
|
if (txn->mt_flags & MDBX_WRITEMAP) {
|
||||||
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync",
|
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync",
|
||||||
dirty_entries, dirty_npages);
|
dirty_entries, dirty_npages);
|
||||||
tASSERT(txn, txn->tw.spilled.list == nullptr);
|
|
||||||
const MDBX_env *env = txn->mt_env;
|
const MDBX_env *env = txn->mt_env;
|
||||||
|
tASSERT(txn, txn->tw.spilled.list == nullptr);
|
||||||
rc =
|
rc =
|
||||||
osal_msync(&txn->mt_env->me_dxb_mmap, 0,
|
osal_msync(&txn->mt_env->me_dxb_mmap, 0,
|
||||||
pgno_align2os_bytes(env, txn->mt_next_pgno), MDBX_SYNC_KICK);
|
pgno_align2os_bytes(env, txn->mt_next_pgno), MDBX_SYNC_KICK);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
#if MDBX_AVOID_MSYNC
|
||||||
|
tASSERT(txn, dirtylist_check(txn));
|
||||||
env->me_lck->mti_unsynced_pages.weak +=
|
env->me_lck->mti_unsynced_pages.weak +=
|
||||||
txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count;
|
txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count;
|
||||||
dpl_clear(txn->tw.dirtylist);
|
dpl_clear(txn->tw.dirtylist);
|
||||||
@ -4921,17 +4930,24 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
rc = dpl_append(txn, lp->mp_pgno, lp, 1);
|
rc = dpl_append(txn, lp->mp_pgno, lp, 1);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(lp), sizeof(MDBX_page *));
|
||||||
|
VALGRIND_MAKE_MEM_DEFINED(&mp_next(lp), sizeof(MDBX_page *));
|
||||||
}
|
}
|
||||||
|
tASSERT(txn, dirtylist_check(txn));
|
||||||
|
#else
|
||||||
|
tASSERT(txn, txn->tw.dirtylist == nullptr);
|
||||||
|
env->me_lck->mti_unsynced_pages.weak += txn->tw.writemap_dirty_npages;
|
||||||
|
txn->tw.writemap_spilled_npages += txn->tw.writemap_dirty_npages;
|
||||||
|
txn->tw.writemap_dirty_npages = 0;
|
||||||
|
#endif /* MDBX_AVOID_MSYNC */
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
#endif /* MDBX_AVOID_MSYNC */
|
|
||||||
|
|
||||||
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write",
|
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write",
|
||||||
need_spill_entries, need_spill_npages);
|
need_spill_entries, need_spill_npages);
|
||||||
tASSERT(txn, txn->tw.dirtylist->length - txn->tw.loose_count >= 1);
|
tASSERT(txn, txn->tw.dirtylist->length - txn->tw.loose_count >= 1);
|
||||||
tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >=
|
tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >=
|
||||||
need_spill_npages);
|
need_spill_npages);
|
||||||
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
|
||||||
if (!txn->tw.spilled.list) {
|
if (!txn->tw.spilled.list) {
|
||||||
txn->tw.spilled.least_removed = INT_MAX;
|
txn->tw.spilled.least_removed = INT_MAX;
|
||||||
txn->tw.spilled.list = pnl_alloc(need_spill);
|
txn->tw.spilled.list = pnl_alloc(need_spill);
|
||||||
@ -4949,7 +4965,6 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
and pnl_append() will increase pnl on demand */
|
and pnl_append() will increase pnl on demand */
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/* Сортируем чтобы запись на диск была полее последовательна */
|
/* Сортируем чтобы запись на диск была полее последовательна */
|
||||||
MDBX_dpl *const dl = dpl_sort(txn);
|
MDBX_dpl *const dl = dpl_sort(txn);
|
||||||
@ -5063,67 +5078,46 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
unsigned prev_prio = 256, prio;
|
size_t r = 0, w = 0;
|
||||||
size_t r, w;
|
pgno_t last = 0;
|
||||||
for (w = 0, r = 1;
|
while (r < dl->length && (spilled_entries < need_spill_entries ||
|
||||||
r <= dl->length && (spilled_entries < need_spill_entries ||
|
spilled_npages < need_spill_npages)) {
|
||||||
spilled_npages < need_spill_npages);
|
dl->items[++w] = dl->items[++r];
|
||||||
prev_prio = prio, ++r) {
|
unsigned prio = spill_prio(txn, w, reciprocal);
|
||||||
prio = spill_prio(txn, r, reciprocal);
|
if (prio > prio2spill &&
|
||||||
MDBX_page *const dp = dl->items[r].ptr;
|
(prio >= prio2adjacent || last != dl->items[w].pgno))
|
||||||
if (prio < prio2adjacent) {
|
continue;
|
||||||
const pgno_t pgno = dl->items[r].pgno;
|
|
||||||
const unsigned npages = dpl_npages(dl, r);
|
|
||||||
if (prio <= prio2spill) {
|
|
||||||
if (prev_prio < prio2adjacent && prev_prio > prio2spill &&
|
|
||||||
dpl_endpgno(dl, r - 1) == pgno) {
|
|
||||||
DEBUG("co-spill %u prev-adjacent page %" PRIaPGNO
|
|
||||||
" (age %d, prio %u)",
|
|
||||||
dpl_npages(dl, w), dl->items[r - 1].pgno, dpl_age(txn, r - 1),
|
|
||||||
prev_prio);
|
|
||||||
--w;
|
|
||||||
const unsigned co_npages = dpl_npages(dl, r - 1);
|
|
||||||
rc = spill_page(txn, &ctx, dl->items[r - 1].ptr, co_npages);
|
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
|
||||||
break;
|
|
||||||
++spilled_entries;
|
|
||||||
spilled_npages += co_npages;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG("spill %u page %" PRIaPGNO " (age %d, prio %u)", npages,
|
const size_t e = w;
|
||||||
dp->mp_pgno, dpl_age(txn, r), prio);
|
last = dpl_endpgno(dl, w);
|
||||||
rc = spill_page(txn, &ctx, dp, npages);
|
while (--w && dpl_endpgno(dl, w) == dl->items[w + 1].pgno &&
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
spill_prio(txn, w, reciprocal) < prio2adjacent)
|
||||||
break;
|
;
|
||||||
|
|
||||||
|
for (size_t i = w; ++i <= e;) {
|
||||||
|
const unsigned npages = dpl_npages(dl, i);
|
||||||
|
prio = spill_prio(txn, i, reciprocal);
|
||||||
|
DEBUG("%sspill[%zu] %u page %" PRIaPGNO " (age %d, prio %u)",
|
||||||
|
(prio > prio2spill) ? "co-" : "", i, npages, dl->items[i].pgno,
|
||||||
|
dpl_age(txn, i), prio);
|
||||||
|
tASSERT(txn, prio < 256);
|
||||||
++spilled_entries;
|
++spilled_entries;
|
||||||
spilled_npages += npages;
|
spilled_npages += npages;
|
||||||
continue;
|
rc = spill_page(txn, &ctx, dl->items[i].ptr, npages);
|
||||||
}
|
|
||||||
|
|
||||||
if (prev_prio <= prio2spill && dpl_endpgno(dl, r - 1) == pgno) {
|
|
||||||
DEBUG("co-spill %u next-adjacent page %" PRIaPGNO
|
|
||||||
" (age %d, prio %u)",
|
|
||||||
npages, dp->mp_pgno, dpl_age(txn, r), prio);
|
|
||||||
rc = spill_page(txn, &ctx, dp, npages);
|
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
break;
|
goto failed;
|
||||||
prio = prev_prio /* to continue co-spilling next adjacent pages */;
|
|
||||||
++spilled_entries;
|
|
||||||
spilled_npages += npages;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dl->items[++w] = dl->items[r];
|
|
||||||
}
|
|
||||||
|
|
||||||
VERBOSE("spilled entries %u, spilled npages %u", spilled_entries,
|
VERBOSE("spilled entries %u, spilled npages %u", spilled_entries,
|
||||||
spilled_npages);
|
spilled_npages);
|
||||||
tASSERT(txn, spillable_entries == 0 || spilled_entries > 0);
|
tASSERT(txn, spillable_entries == 0 || spilled_entries > 0);
|
||||||
tASSERT(txn, spilled_npages >= spilled_entries);
|
tASSERT(txn, spilled_npages >= spilled_entries);
|
||||||
|
|
||||||
while (r <= dl->length)
|
failed:
|
||||||
dl->items[++w] = dl->items[r++];
|
while (r < dl->length)
|
||||||
tASSERT(txn, r - 1 - w == spilled_entries);
|
dl->items[++w] = dl->items[++r];
|
||||||
|
tASSERT(txn, r - w == spilled_entries || rc != MDBX_SUCCESS);
|
||||||
|
|
||||||
dl->sorted = dpl_setlen(dl, w);
|
dl->sorted = dpl_setlen(dl, w);
|
||||||
txn->tw.dirtyroom += spilled_entries;
|
txn->tw.dirtyroom += spilled_entries;
|
||||||
@ -5138,10 +5132,8 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
goto bailout;
|
goto bailout;
|
||||||
|
|
||||||
txn->mt_env->me_lck->mti_unsynced_pages.weak += spilled_npages;
|
txn->mt_env->me_lck->mti_unsynced_pages.weak += spilled_npages;
|
||||||
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
|
||||||
pnl_sort(txn->tw.spilled.list, (size_t)txn->mt_next_pgno << 1);
|
pnl_sort(txn->tw.spilled.list, (size_t)txn->mt_next_pgno << 1);
|
||||||
txn->mt_flags |= MDBX_TXN_SPILLS;
|
txn->mt_flags |= MDBX_TXN_SPILLS;
|
||||||
}
|
|
||||||
NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room",
|
NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room",
|
||||||
spilled_entries, spilled_npages, txn->tw.dirtyroom);
|
spilled_entries, spilled_npages, txn->tw.dirtyroom);
|
||||||
} else {
|
} else {
|
||||||
@ -5180,11 +5172,6 @@ static int cursor_spill(MDBX_cursor *mc, const MDBX_val *key,
|
|||||||
const MDBX_val *data) {
|
const MDBX_val *data) {
|
||||||
MDBX_txn *txn = mc->mc_txn;
|
MDBX_txn *txn = mc->mc_txn;
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
if (!txn->tw.dirtylist) {
|
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
|
|
||||||
return MDBX_SUCCESS;
|
|
||||||
}
|
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
|
||||||
|
|
||||||
/* Estimate how much space this operation will take: */
|
/* Estimate how much space this operation will take: */
|
||||||
/* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
|
/* 1) Max b-tree height, reasonable enough with including dups' sub-tree */
|
||||||
@ -5676,16 +5663,12 @@ __hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
|
|||||||
txn->tw.loose_pages = mp_next(lp);
|
txn->tw.loose_pages = mp_next(lp);
|
||||||
txn->tw.loose_count--;
|
txn->tw.loose_count--;
|
||||||
txn->tw.dirtyroom++;
|
txn->tw.dirtyroom++;
|
||||||
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP))
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0);
|
|
||||||
dpage_free(txn->mt_env, lp, 1);
|
dpage_free(txn->mt_env, lp, 1);
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
ERROR("Dirtyroom is depleted, DPL length %zu", txn->tw.dirtylist->length);
|
ERROR("Dirtyroom is depleted, DPL length %zu", txn->tw.dirtylist->length);
|
||||||
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP))
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0);
|
|
||||||
dpage_free(txn->mt_env, mp, npages);
|
dpage_free(txn->mt_env, mp, npages);
|
||||||
}
|
|
||||||
return MDBX_TXN_FULL;
|
return MDBX_TXN_FULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -6059,7 +6042,7 @@ __cold static int map_resize(MDBX_env *env, const pgno_t used_pgno,
|
|||||||
env->me_lck->mti_pgop_stat.msync.weak += 1;
|
env->me_lck->mti_pgop_stat.msync.weak += 1;
|
||||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||||
rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno),
|
rc = osal_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno),
|
||||||
MDBX_SYNC_KICK);
|
MDBX_SYNC_NONE);
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
@ -7834,6 +7817,20 @@ __hot static int page_touch(MDBX_cursor *mc) {
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
||||||
const size_t n = dpl_search(txn, mp->mp_pgno);
|
const size_t n = dpl_search(txn, mp->mp_pgno);
|
||||||
|
if (MDBX_AVOID_MSYNC &&
|
||||||
|
unlikely(txn->tw.dirtylist->items[n].pgno != mp->mp_pgno)) {
|
||||||
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP));
|
||||||
|
tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length + 1);
|
||||||
|
VERBOSE("unspill page %" PRIaPGNO, mp->mp_pgno);
|
||||||
|
np = (MDBX_page *)mp;
|
||||||
|
#if MDBX_ENABLE_PGOP_STAT
|
||||||
|
txn->mt_env->me_lck->mti_pgop_stat.unspill.weak += 1;
|
||||||
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||||
|
return page_dirty(txn, np, 1);
|
||||||
|
}
|
||||||
|
tASSERT(txn, n > 0 && n <= txn->tw.dirtylist->length);
|
||||||
|
tASSERT(txn, txn->tw.dirtylist->items[n].pgno == mp->mp_pgno &&
|
||||||
|
txn->tw.dirtylist->items[n].ptr == mp);
|
||||||
txn->tw.dirtylist->items[n].mlru =
|
txn->tw.dirtylist->items[n].mlru =
|
||||||
(txn->tw.dirtylist->items[n].mlru & MDBX_dp_multi_mask) +
|
(txn->tw.dirtylist->items[n].mlru & MDBX_dp_multi_mask) +
|
||||||
txn_lru_turn(txn);
|
txn_lru_turn(txn);
|
||||||
@ -8883,6 +8880,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
|||||||
txn->tw.dirtyroom = MAX_PAGENO;
|
txn->tw.dirtyroom = MAX_PAGENO;
|
||||||
txn->tw.dirtylru = 0;
|
txn->tw.dirtylru = 0;
|
||||||
}
|
}
|
||||||
|
eASSERT(env, txn->tw.writemap_dirty_npages == 0);
|
||||||
|
eASSERT(env, txn->tw.writemap_spilled_npages == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Setup db info */
|
/* Setup db info */
|
||||||
@ -9352,7 +9351,8 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
|
|||||||
info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
|
info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
|
||||||
info->txn_space_dirty = pgno2bytes(
|
info->txn_space_dirty = pgno2bytes(
|
||||||
env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
||||||
: txn->tw.writemap_dirty_npages);
|
: (txn->tw.writemap_dirty_npages +
|
||||||
|
txn->tw.writemap_spilled_npages));
|
||||||
info->txn_reader_lag = INT64_MAX;
|
info->txn_reader_lag = INT64_MAX;
|
||||||
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
||||||
if (scan_rlt && lck) {
|
if (scan_rlt && lck) {
|
||||||
@ -9566,10 +9566,8 @@ static void dpl_sift(MDBX_txn *const txn, MDBX_PNL pl, const bool spilled) {
|
|||||||
remove_dl:
|
remove_dl:
|
||||||
npages = dpl_npages(dl, r);
|
npages = dpl_npages(dl, r);
|
||||||
dl->pages_including_loose -= npages;
|
dl->pages_including_loose -= npages;
|
||||||
if (!MDBX_AVOID_MSYNC || !(txn->mt_env->me_flags & MDBX_WRITEMAP)) {
|
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP))
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0);
|
|
||||||
dpage_free(txn->mt_env, dl->items[r].ptr, npages);
|
dpage_free(txn->mt_env, dl->items[r].ptr, npages);
|
||||||
}
|
|
||||||
++r;
|
++r;
|
||||||
next_i:
|
next_i:
|
||||||
i += step;
|
i += step;
|
||||||
@ -12410,7 +12408,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
|
|||||||
rc = MDBX_RESULT_FALSE /* carry steady */;
|
rc = MDBX_RESULT_FALSE /* carry steady */;
|
||||||
if (atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed)) {
|
if (atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed)) {
|
||||||
eASSERT(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
|
eASSERT(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
|
||||||
enum osal_syncmode_bits mode_bits = MDBX_SYNC_KICK;
|
enum osal_syncmode_bits mode_bits = MDBX_SYNC_NONE;
|
||||||
unsigned sync_op = 0;
|
unsigned sync_op = 0;
|
||||||
if ((flags & MDBX_SAFE_NOSYNC) == 0) {
|
if ((flags & MDBX_SAFE_NOSYNC) == 0) {
|
||||||
sync_op = 1;
|
sync_op = 1;
|
||||||
@ -12422,7 +12420,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
|
|||||||
mode_bits |= MDBX_SYNC_IODQ;
|
mode_bits |= MDBX_SYNC_IODQ;
|
||||||
} else if (unlikely(env->me_incore))
|
} else if (unlikely(env->me_incore))
|
||||||
goto skip_incore_sync;
|
goto skip_incore_sync;
|
||||||
if (!MDBX_AVOID_MSYNC && (flags & MDBX_WRITEMAP)) {
|
if (flags & MDBX_WRITEMAP) {
|
||||||
#if MDBX_ENABLE_PGOP_STAT
|
#if MDBX_ENABLE_PGOP_STAT
|
||||||
env->me_lck->mti_pgop_stat.msync.weak += sync_op;
|
env->me_lck->mti_pgop_stat.msync.weak += sync_op;
|
||||||
#else
|
#else
|
||||||
@ -12567,7 +12565,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
|
|||||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||||
rc = osal_msync(
|
rc = osal_msync(
|
||||||
&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
|
&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
|
||||||
(flags & MDBX_NOMETASYNC) ? MDBX_SYNC_KICK
|
(flags & MDBX_NOMETASYNC) ? MDBX_SYNC_NONE
|
||||||
: MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
|
: MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
|
||||||
} else {
|
} else {
|
||||||
#if MDBX_ENABLE_PGOP_STAT
|
#if MDBX_ENABLE_PGOP_STAT
|
||||||
@ -13995,14 +13993,10 @@ __cold static int setup_lck(MDBX_env *env, pathchar_t *lck_pathname,
|
|||||||
#if MDBX_ENABLE_PGOP_STAT
|
#if MDBX_ENABLE_PGOP_STAT
|
||||||
lck->mti_pgop_stat.wops.weak = 1;
|
lck->mti_pgop_stat.wops.weak = 1;
|
||||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
||||||
err = osal_msync(&env->me_lck_mmap, 0, (size_t)size, MDBX_SYNC_KICK);
|
err = osal_msync(&env->me_lck_mmap, 0, (size_t)size,
|
||||||
|
MDBX_SYNC_DATA | MDBX_SYNC_SIZE);
|
||||||
if (unlikely(err != MDBX_SUCCESS)) {
|
if (unlikely(err != MDBX_SUCCESS)) {
|
||||||
ERROR("initial-%s for lck-file failed", "msync");
|
ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err);
|
||||||
goto bailout;
|
|
||||||
}
|
|
||||||
err = osal_fsync(env->me_lck_mmap.fd, MDBX_SYNC_SIZE);
|
|
||||||
if (unlikely(err != MDBX_SUCCESS)) {
|
|
||||||
ERROR("initial-%s for lck-file failed", "fsync");
|
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1110,6 +1110,7 @@ struct MDBX_txn {
|
|||||||
MDBX_PNL list;
|
MDBX_PNL list;
|
||||||
} spilled;
|
} spilled;
|
||||||
size_t writemap_dirty_npages;
|
size_t writemap_dirty_npages;
|
||||||
|
size_t writemap_spilled_npages;
|
||||||
};
|
};
|
||||||
} tw;
|
} tw;
|
||||||
};
|
};
|
||||||
|
@ -1566,6 +1566,7 @@ MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd,
|
|||||||
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */
|
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */
|
||||||
while (1) {
|
while (1) {
|
||||||
switch (mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_SIZE)) {
|
switch (mode_bits & (MDBX_SYNC_DATA | MDBX_SYNC_SIZE)) {
|
||||||
|
case MDBX_SYNC_NONE:
|
||||||
case MDBX_SYNC_KICK:
|
case MDBX_SYNC_KICK:
|
||||||
return MDBX_SUCCESS /* nothing to do */;
|
return MDBX_SUCCESS /* nothing to do */;
|
||||||
#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
|
#if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0
|
||||||
@ -1707,7 +1708,7 @@ MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread) {
|
|||||||
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
||||||
size_t length,
|
size_t length,
|
||||||
enum osal_syncmode_bits mode_bits) {
|
enum osal_syncmode_bits mode_bits) {
|
||||||
if (!MDBX_MMAP_USE_MS_ASYNC && mode_bits == MDBX_SYNC_KICK)
|
if (!MDBX_MMAP_USE_MS_ASYNC && mode_bits == MDBX_SYNC_NONE)
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
|
|
||||||
void *ptr = ptr_disp(map->base, offset);
|
void *ptr = ptr_disp(map->base, offset);
|
||||||
@ -1727,7 +1728,7 @@ MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
|||||||
// NOTE: The MDBX_MMAP_USE_MS_ASYNC must be defined to 1 for such cases.
|
// NOTE: The MDBX_MMAP_USE_MS_ASYNC must be defined to 1 for such cases.
|
||||||
//
|
//
|
||||||
// assert(linux_kernel_version > 0x02061300);
|
// assert(linux_kernel_version > 0x02061300);
|
||||||
// if (mode_bits == MDBX_SYNC_KICK)
|
// if (mode_bits <= MDBX_SYNC_KICK)
|
||||||
// return MDBX_SUCCESS;
|
// return MDBX_SUCCESS;
|
||||||
#endif /* Linux */
|
#endif /* Linux */
|
||||||
if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
|
if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
|
||||||
|
@ -523,10 +523,11 @@ osal_thread_create(osal_thread_t *thread,
|
|||||||
MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread);
|
MDBX_INTERNAL_FUNC int osal_thread_join(osal_thread_t thread);
|
||||||
|
|
||||||
enum osal_syncmode_bits {
|
enum osal_syncmode_bits {
|
||||||
MDBX_SYNC_KICK = 0,
|
MDBX_SYNC_NONE = 0,
|
||||||
MDBX_SYNC_DATA = 1,
|
MDBX_SYNC_KICK = 1,
|
||||||
MDBX_SYNC_SIZE = 2,
|
MDBX_SYNC_DATA = 2,
|
||||||
MDBX_SYNC_IODQ = 4
|
MDBX_SYNC_SIZE = 4,
|
||||||
|
MDBX_SYNC_IODQ = 8
|
||||||
};
|
};
|
||||||
|
|
||||||
MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd,
|
MDBX_INTERNAL_FUNC int osal_fsync(mdbx_filehandle_t fd,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user