mdbx: Merge branch 'devel'.

This commit is contained in:
Leo Yuriev 2017-07-23 21:34:29 +03:00
commit 3a53d34ce5
2 changed files with 75 additions and 30 deletions

View File

@ -9,6 +9,10 @@ environment:
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013
TOOLSET: v120 TOOLSET: v120
branches:
except:
- coverity_scan
configuration: configuration:
- Debug - Debug
- Release - Release

View File

@ -1,4 +1,4 @@
/* /*
* Copyright 2015-2017 Leonid Yuriev <leo@yuriev.ru> * Copyright 2015-2017 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file. * and other libmdbx authors: please see AUTHORS file.
* All rights reserved. * All rights reserved.
@ -1396,21 +1396,34 @@ static __inline uint64_t mdbx_meta_sign(const MDBX_meta *meta) {
return (sign > MDBX_DATASIGN_WEAK) ? sign : ~sign; return (sign > MDBX_DATASIGN_WEAK) ? sign : ~sign;
} }
enum meta_choise_mode { prefer_last, prefer_noweak, prefer_steady };
static __inline bool mdbx_meta_ot(const MDBX_env *env, const MDBX_meta *a, static __inline bool mdbx_meta_ot(const MDBX_env *env, const MDBX_meta *a,
const MDBX_meta *b, const MDBX_meta *b,
const bool roolback2steady) { const enum meta_choise_mode mode) {
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
txnid_t txnid_a = mdbx_meta_txnid_fluid(env, a); txnid_t txnid_a = mdbx_meta_txnid_fluid(env, a);
txnid_t txnid_b = mdbx_meta_txnid_fluid(env, b); txnid_t txnid_b = mdbx_meta_txnid_fluid(env, b);
if (txnid_a == txnid_b) if (txnid_a == txnid_b)
return META_IS_STEADY(b); return META_IS_STEADY(b) || (META_IS_WEAK(a) && !META_IS_WEAK(a));
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
if (roolback2steady && META_IS_STEADY(a) != META_IS_STEADY(b)) switch (mode) {
return META_IS_STEADY(b); default:
assert(false);
mdbx_jitter4testing(true); /* fall through */
return txnid_a < txnid_b; case prefer_steady:
if (META_IS_STEADY(a) != META_IS_STEADY(b))
return META_IS_STEADY(b);
/* fall through */
case prefer_noweak:
if (META_IS_WEAK(a) != META_IS_WEAK(b))
return !META_IS_WEAK(b);
/* fall through */
case prefer_last:
mdbx_jitter4testing(true);
return txnid_a < txnid_b;
}
} }
static __inline bool mdbx_meta_eq(const MDBX_env *env, const MDBX_meta *a, static __inline bool mdbx_meta_eq(const MDBX_env *env, const MDBX_meta *a,
@ -1443,37 +1456,42 @@ static int mdbx_meta_eq_mask(const MDBX_env *env) {
static __inline MDBX_meta *mdbx_meta_recent(const MDBX_env *env, MDBX_meta *a, static __inline MDBX_meta *mdbx_meta_recent(const MDBX_env *env, MDBX_meta *a,
MDBX_meta *b, MDBX_meta *b,
const bool roolback2steady) { const enum meta_choise_mode mode) {
const bool a_older_that_b = mdbx_meta_ot(env, a, b, roolback2steady); const bool a_older_that_b = mdbx_meta_ot(env, a, b, mode);
mdbx_assert(env, !mdbx_meta_eq(env, a, b)); mdbx_assert(env, !mdbx_meta_eq(env, a, b));
return a_older_that_b ? b : a; return a_older_that_b ? b : a;
} }
static __inline MDBX_meta *mdbx_meta_ancient(const MDBX_env *env, MDBX_meta *a, static __inline MDBX_meta *mdbx_meta_ancient(const MDBX_env *env, MDBX_meta *a,
MDBX_meta *b, MDBX_meta *b,
const bool roolback2steady) { const enum meta_choise_mode mode) {
const bool a_older_that_b = mdbx_meta_ot(env, a, b, roolback2steady); const bool a_older_that_b = mdbx_meta_ot(env, a, b, mode);
mdbx_assert(env, !mdbx_meta_eq(env, a, b)); mdbx_assert(env, !mdbx_meta_eq(env, a, b));
return a_older_that_b ? a : b; return a_older_that_b ? a : b;
} }
static __inline MDBX_meta *mdbx_meta_mostrecent(const MDBX_env *env, static __inline MDBX_meta *
const bool roolback2steady) { mdbx_meta_mostrecent(const MDBX_env *env, const enum meta_choise_mode mode) {
MDBX_meta *m0 = METAPAGE(env, 0); MDBX_meta *m0 = METAPAGE(env, 0);
MDBX_meta *m1 = METAPAGE(env, 1); MDBX_meta *m1 = METAPAGE(env, 1);
MDBX_meta *m2 = METAPAGE(env, 2); MDBX_meta *m2 = METAPAGE(env, 2);
MDBX_meta *head = mdbx_meta_recent(env, m0, m1, roolback2steady); MDBX_meta *head = mdbx_meta_recent(env, m0, m1, mode);
head = mdbx_meta_recent(env, head, m2, roolback2steady); head = mdbx_meta_recent(env, head, m2, mode);
return head; return head;
} }
static __hot MDBX_meta *mdbx_meta_steady(const MDBX_env *env) { static __hot MDBX_meta *mdbx_meta_steady(const MDBX_env *env) {
return mdbx_meta_mostrecent(env, true); return mdbx_meta_mostrecent(env, prefer_steady);
} }
static __hot MDBX_meta *mdbx_meta_head(const MDBX_env *env) { static __hot MDBX_meta *mdbx_meta_head(const MDBX_env *env) {
return mdbx_meta_mostrecent(env, false); return mdbx_meta_mostrecent(env, prefer_last);
}
static __hot txnid_t mdbx_reclaiming_detent(const MDBX_env *env) {
MDBX_meta *meta = mdbx_meta_mostrecent(env, prefer_noweak);
return mdbx_meta_txnid_stable(env, meta);
} }
static const char *mdbx_durable_str(const MDBX_meta *const meta) { static const char *mdbx_durable_str(const MDBX_meta *const meta) {
@ -1490,10 +1508,12 @@ static const char *mdbx_durable_str(const MDBX_meta *const meta) {
/* Find oldest txnid still referenced. */ /* Find oldest txnid still referenced. */
static txnid_t mdbx_find_oldest(MDBX_txn *txn) { static txnid_t mdbx_find_oldest(MDBX_txn *txn) {
mdbx_tassert(txn, (txn->mt_flags & MDBX_RDONLY) == 0); mdbx_tassert(txn, (txn->mt_flags & MDBX_RDONLY) == 0);
MDBX_lockinfo *const lck = txn->mt_env->me_lck; const MDBX_env *env = txn->mt_env;
MDBX_lockinfo *const lck = env->me_lck;
txnid_t oldest = mdbx_reclaiming_detent(env);
mdbx_tassert(txn, oldest <= txn->mt_txnid - 1);
const txnid_t last_oldest = lck->mti_oldest; const txnid_t last_oldest = lck->mti_oldest;
txnid_t oldest = txn->mt_txnid - 1;
mdbx_tassert(txn, oldest >= last_oldest); mdbx_tassert(txn, oldest >= last_oldest);
if (last_oldest == oldest || if (last_oldest == oldest ||
lck->mti_reader_finished_flag == MDBX_STRING_TETRAD("None")) lck->mti_reader_finished_flag == MDBX_STRING_TETRAD("None"))
@ -1503,7 +1523,7 @@ static txnid_t mdbx_find_oldest(MDBX_txn *txn) {
lck->mti_reader_finished_flag = MDBX_STRING_TETRAD("None"); lck->mti_reader_finished_flag = MDBX_STRING_TETRAD("None");
for (unsigned i = 0; i < snap_nreaders; ++i) { for (unsigned i = 0; i < snap_nreaders; ++i) {
if (lck->mti_readers[i].mr_pid) { if (lck->mti_readers[i].mr_pid) {
mdbx_jitter4testing(true); /* mdbx_jitter4testing(true); */
const txnid_t snap = lck->mti_readers[i].mr_txnid; const txnid_t snap = lck->mti_readers[i].mr_txnid;
if (oldest > snap && last_oldest <= /* ignore pending updates */ snap) { if (oldest > snap && last_oldest <= /* ignore pending updates */ snap) {
oldest = snap; oldest = snap;
@ -1514,6 +1534,7 @@ static txnid_t mdbx_find_oldest(MDBX_txn *txn) {
} }
if (oldest != last_oldest) { if (oldest != last_oldest) {
mdbx_notice("update oldest %" PRIaTXN " -> %" PRIaTXN, last_oldest, oldest);
mdbx_tassert(txn, oldest >= lck->mti_oldest); mdbx_tassert(txn, oldest >= lck->mti_oldest);
lck->mti_oldest = oldest; lck->mti_oldest = oldest;
} }
@ -1643,7 +1664,6 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
} }
} }
const MDBX_meta *head = mdbx_meta_head(env);
pgno_t pgno, *repg_list = env->me_reclaimed_pglist; pgno_t pgno, *repg_list = env->me_reclaimed_pglist;
unsigned repg_pos = 0, repg_len = repg_list ? repg_list[0] : 0; unsigned repg_pos = 0, repg_len = repg_list ? repg_list[0] : 0;
txnid_t oldest = 0, last = 0; txnid_t oldest = 0, last = 0;
@ -1845,12 +1865,13 @@ static int mdbx_page_alloc(MDBX_cursor *mc, unsigned num, MDBX_page **mp,
goto done; goto done;
} }
const MDBX_meta *head = mdbx_meta_head(env);
if ((flags & MDBX_ALLOC_GC) && if ((flags & MDBX_ALLOC_GC) &&
((flags & MDBX_ALLOC_KICK) || rc == MDBX_MAP_FULL)) { ((flags & MDBX_ALLOC_KICK) || rc == MDBX_MAP_FULL)) {
MDBX_meta *steady = mdbx_meta_steady(env); MDBX_meta *steady = mdbx_meta_steady(env);
if (oldest == mdbx_meta_txnid_stable(env, steady) && META_IS_WEAK(head) && if (oldest == mdbx_meta_txnid_stable(env, steady) &&
!META_IS_WEAK(steady)) { !META_IS_STEADY(head) && META_IS_STEADY(steady)) {
/* LY: Here an oom was happened: /* LY: Here an oom was happened:
* - all pages had allocated; * - all pages had allocated;
* - reclaiming was stopped at the last steady-sync; * - reclaiming was stopped at the last steady-sync;
@ -2416,12 +2437,20 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
/* LY: Retry on a race, ITS#7970. */ /* LY: Retry on a race, ITS#7970. */
mdbx_compiler_barrier(); mdbx_compiler_barrier();
if (likely(meta == mdbx_meta_head(env) && if (likely(meta == mdbx_meta_head(env) &&
snap == mdbx_meta_txnid_fluid(env, meta))) { snap == mdbx_meta_txnid_fluid(env, meta) &&
snap >= env->me_oldest[0])) {
mdbx_jitter4testing(false); mdbx_jitter4testing(false);
break; break;
} }
if (env->me_lck)
env->me_lck->mti_reader_finished_flag = true;
} }
if (unlikely(txn->mt_txnid == 0)) {
mdbx_error("environment corrupted by died writer, must shutdown!");
rc = MDBX_WANNA_RECOVERY;
goto bailout;
}
mdbx_assert(env, txn->mt_txnid >= *env->me_oldest); mdbx_assert(env, txn->mt_txnid >= *env->me_oldest);
txn->mt_ro_reader = r; txn->mt_ro_reader = r;
txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */
@ -2478,7 +2507,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
txn->mt_dbflags[FREE_DBI] = DB_VALID; txn->mt_dbflags[FREE_DBI] = DB_VALID;
if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) {
mdbx_debug("environment had fatal error, must shutdown!"); mdbx_warning("environment had fatal error, must shutdown!");
rc = MDBX_PANIC; rc = MDBX_PANIC;
} else { } else {
const size_t size = pgno2bytes(env, txn->mt_end_pgno); const size_t size = pgno2bytes(env, txn->mt_end_pgno);
@ -3512,6 +3541,11 @@ int mdbx_txn_commit(MDBX_txn *txn) {
if (likely(rc == MDBX_SUCCESS)) { if (likely(rc == MDBX_SUCCESS)) {
MDBX_meta meta, *head = mdbx_meta_head(env); MDBX_meta meta, *head = mdbx_meta_head(env);
meta.mm_magic_and_version = head->mm_magic_and_version;
meta.mm_extra_flags = head->mm_extra_flags;
meta.mm_validator_id = head->mm_validator_id;
meta.mm_extra_pagehdr = head->mm_extra_pagehdr;
meta.mm_geo = head->mm_geo; meta.mm_geo = head->mm_geo;
meta.mm_geo.next = txn->mt_next_pgno; meta.mm_geo.next = txn->mt_next_pgno;
meta.mm_geo.now = txn->mt_end_pgno; meta.mm_geo.now = txn->mt_end_pgno;
@ -3989,7 +4023,6 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_coherent_barrier(); mdbx_coherent_barrier();
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
} else { } else {
pending->mm_magic_and_version = MDBX_DATA_MAGIC;
rc = mdbx_pwrite(env->me_fd, pending, sizeof(MDBX_meta), rc = mdbx_pwrite(env->me_fd, pending, sizeof(MDBX_meta),
(uint8_t *)target - env->me_map); (uint8_t *)target - env->me_map);
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS)) {
@ -10398,10 +10431,16 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
int retry; int retry;
for (retry = 0; retry < INT_MAX; ++retry) { for (retry = 0; retry < INT_MAX; ++retry) {
txnid_t oldest = mdbx_reclaiming_detent(env);
mdbx_assert(env, oldest < env->me_txn0->mt_txnid);
mdbx_assert(env, oldest >= laggard);
mdbx_assert(env, oldest >= env->me_oldest[0]);
if (oldest == laggard)
return oldest;
if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL))) if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL)))
break; break;
txnid_t oldest = env->me_txn0->mt_txnid - 1;
MDBX_reader *const rtbl = env->me_lck->mti_readers; MDBX_reader *const rtbl = env->me_lck->mti_readers;
MDBX_reader *asleep = nullptr; MDBX_reader *asleep = nullptr;
for (int i = env->me_lck->mti_numreaders; --i >= 0;) { for (int i = env->me_lck->mti_numreaders; --i >= 0;) {
@ -10422,6 +10461,8 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
env->me_oom_func(env, 0, 0, laggard, env->me_oom_func(env, 0, 0, laggard,
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, -retry); (gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, -retry);
} }
mdbx_notice("oom-kick: update oldest %" PRIaTXN " -> %" PRIaTXN,
env->me_oldest[0], oldest);
mdbx_assert(env, env->me_oldest[0] <= oldest); mdbx_assert(env, env->me_oldest[0] <= oldest);
return env->me_oldest[0] = oldest; return env->me_oldest[0] = oldest;
} }
@ -11242,13 +11283,13 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
return rc; return rc;
} }
mdbx_attr_t old_attr; mdbx_attr_t old_attr = 0;
rc = mdbx_attr_peek(&old_data, &old_attr); rc = mdbx_attr_peek(&old_data, &old_attr);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
if (old_attr == attr && (!data || (data->iov_len == old_data.iov_len && if (old_attr == attr && (!data || (data->iov_len == old_data.iov_len &&
memcpy(data->iov_base, old_data.iov_base, memcmp(data->iov_base, old_data.iov_base,
old_data.iov_len) == 0))) old_data.iov_len) == 0)))
return MDBX_SUCCESS; return MDBX_SUCCESS;