From 39c1238d8ee3db4eaa07e87050ad0316ae9b6a20 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Thu, 10 Oct 2019 22:11:28 +0300 Subject: [PATCH] mdbx: refine Valgrind support (i.e. avoid false-positives). Change-Id: I688b2e905d0b705c34ab29df29bfd0a9bcdde8c9 --- GNUmakefile | 8 +++ src/elements/core.c | 120 +++++++++++++++++++++++++++++++++---- src/elements/defs.h | 4 +- src/elements/internals.h | 5 +- test/long_stochastic.sh | 19 +++++- test/valgrind_suppress.txt | 16 +++++ 6 files changed, 154 insertions(+), 18 deletions(-) create mode 100644 test/valgrind_suppress.txt diff --git a/GNUmakefile b/GNUmakefile index e680e8f3..e8dc33ae 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -149,6 +149,14 @@ check-fault: all mdbx_test rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --inject-writefault=42 --dump-config --dont-cleanup-after basic | tee -a $(TEST_LOG) | tail -n 42) \ ; ./mdbx_chk -vvnw $(TEST_DB) && ([ ! -e $(TEST_DB)-copy ] || ./mdbx_chk -vvn $(TEST_DB)-copy) +VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt +memcheck check-valgrind: all mdbx_test + rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ + $(VALGRIND) ./mdbx_test --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after --hill && \ + $(VALGRIND) ./mdbx_test --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy \ + | tee -a $(TEST_LOG) | tail -n 42) \ + && $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy + define test-rule $(patsubst %.cc,%.o,$(1)): $(1) $(TEST_INC) mdbx.h $(lastword $(MAKEFILE_LIST)) $(CXX) $(CXXFLAGS) $(MDBX_OPTIONS) -c $(1) -o $$@ diff --git a/src/elements/core.c b/src/elements/core.c index 750984bd..6d27f4e9 100644 --- a/src/elements/core.c +++ b/src/elements/core.c @@ -2211,12 +2211,12 @@ static __must_check_result int mdbx_page_loose(MDBX_cursor *mc, MDBX_page *mp) { if (loose) { mdbx_debug("loosen db %d page %" PRIaPGNO, DDBI(mc), mp->mp_pgno); + VALGRIND_MAKE_MEM_UNDEFINED(mp, txn->mt_env->me_psize); + VALGRIND_MAKE_MEM_DEFINED(&mp->mp_pgno, sizeof(mp->mp_pgno)); if (unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) mdbx_kill_page(txn->mt_env, mp); MDBX_page **link = &NEXT_LOOSE_PAGE(mp); mp->mp_flags = P_LOOSE | P_DIRTY; - VALGRIND_MAKE_MEM_UNDEFINED(mp, PAGEHDRSZ); - ASAN_UNPOISON_MEMORY_REGION(link, sizeof(*link)); *link = txn->tw.loose_pages; txn->tw.loose_pages = mp; txn->tw.loose_count++; @@ -2721,7 +2721,7 @@ static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) { __cold static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno, const pgno_t limit_pgno) { -#ifdef USE_VALGRIND +#ifdef MDBX_USE_VALGRIND const size_t prev_mapsize = env->me_mapsize; void *const prev_mapaddr = env->me_map; #endif @@ -2792,7 +2792,7 @@ bailout: mdbx_tassert(env->me_txn, size_pgno >= env->me_txn->mt_next_pgno); env->me_txn->mt_end_pgno = env->me_txn0->mt_end_pgno = size_pgno; } -#ifdef USE_VALGRIND +#ifdef MDBX_USE_VALGRIND if (prev_mapsize != env->me_mapsize || prev_mapaddr != env->me_map) { VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = 0; @@ -3664,6 +3664,77 @@ static void mdbx_cursors_eot(MDBX_txn *txn, unsigned merge) { } } +#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +/* Find largest mvcc-snapshot still referenced by this process. */ +static pgno_t mdbx_find_largest_this(MDBX_env *env, pgno_t largest) { + MDBX_lockinfo *const lck = env->me_lck; + if (likely(lck != NULL /* exclusive mode */)) { + const unsigned snap_nreaders = lck->mti_numreaders; + for (unsigned i = 0; i < snap_nreaders; ++i) { + retry: + if (lck->mti_readers[i].mr_pid == env->me_pid) { + /* mdbx_jitter4testing(true); */ + const pgno_t snap_pages = lck->mti_readers[i].mr_snapshot_pages_used; + const txnid_t snap_txnid = safe64_read(&lck->mti_readers[i].mr_txnid); + mdbx_memory_barrier(); + if (unlikely(snap_pages != lck->mti_readers[i].mr_snapshot_pages_used || + snap_txnid != safe64_read(&lck->mti_readers[i].mr_txnid))) + goto retry; + if (largest < snap_pages && + lck->mti_oldest_reader <= /* ignore pending updates */ snap_txnid && + snap_txnid <= env->me_txn0->mt_txnid) + largest = snap_pages; + } + } + } + return largest; +} + +static void mdbx_txn_valgrind(MDBX_env *env, MDBX_txn *txn) { +#if !defined(__SANITIZE_ADDRESS__) + if (!RUNNING_ON_VALGRIND) + return; +#endif + + if (txn) { /* transaction start */ + if (env->me_poison_edge < txn->mt_next_pgno) + env->me_poison_edge = txn->mt_next_pgno; + VALGRIND_MAKE_MEM_DEFINED(env->me_map, pgno2bytes(env, txn->mt_next_pgno)); + ASAN_UNPOISON_MEMORY_REGION(env->me_map, + pgno2bytes(env, txn->mt_next_pgno)); + /* don't touch more, it should be already poisoned */ + } else { /* transaction end */ + bool should_unlock = false; + pgno_t last = MAX_PAGENO; + if (env->me_txn0 && env->me_txn0->mt_owner == mdbx_thread_self()) { + /* inside write-txn */ + MDBX_meta *head = mdbx_meta_head(env); + last = head->mm_geo.next; + } else if (mdbx_txn_lock(env, true) == MDBX_SUCCESS) { + /* no write-txn */ + last = NUM_METAS; + should_unlock = true; + } else { + /* write txn is running, therefore shouldn't poison any memory range */ + return; + } + + last = mdbx_find_largest_this(env, last); + const pgno_t edge = env->me_poison_edge ? env->me_poison_edge + : bytes2pgno(env, env->me_mapsize); + if (edge > last) { + env->me_poison_edge = last; + VALGRIND_MAKE_MEM_NOACCESS(env->me_map + pgno2bytes(env, last), + pgno2bytes(env, edge - last)); + ASAN_POISON_MEMORY_REGION(env->me_map + pgno2bytes(env, last), + pgno2bytes(env, edge - last)); + } + if (should_unlock) + mdbx_txn_unlock(env); + } +} +#endif /* MDBX_USE_VALGRIND */ + /* Common code for mdbx_txn_begin() and mdbx_txn_renew(). */ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) { MDBX_env *env = txn->mt_env; @@ -3912,6 +3983,9 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) { txn->mt_flags |= MDBX_SHRINK_ALLOWED; mdbx_srwlock_AcquireShared(&env->me_remap_guard); } +#endif +#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + mdbx_txn_valgrind(env, txn); #endif return MDBX_SUCCESS; } @@ -4338,11 +4412,8 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { mdbx_ensure(env, txn->mt_txnid >= /* paranoia is appropriate here */ *env->me_oldest); + if (F_ISSET(txn->mt_flags, MDBX_RDONLY)) { -#if defined(_WIN32) || defined(_WIN64) - if (txn->mt_flags & MDBX_SHRINK_ALLOWED) - mdbx_srwlock_ReleaseShared(&env->me_remap_guard); -#endif if (txn->to.reader) { mdbx_ensure(env, /* paranoia is appropriate here */ txn->mt_txnid == txn->to.reader->mr_txnid.inconsistent && @@ -4358,13 +4429,23 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { env->me_lck->mti_readers_refresh_flag = true; mdbx_flush_noncoherent_cpu_writeback(); } +#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + mdbx_txn_valgrind(env, nullptr); +#endif +#if defined(_WIN32) || defined(_WIN64) + if (txn->mt_flags & MDBX_SHRINK_ALLOWED) + mdbx_srwlock_ReleaseShared(&env->me_remap_guard); +#endif txn->mt_numdbs = 0; /* prevent further DBI activity */ txn->mt_flags = MDBX_RDONLY | MDBX_TXN_FINISHED; txn->mt_owner = 0; } else if (!F_ISSET(txn->mt_flags, MDBX_TXN_FINISHED)) { +#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + if (txn == env->me_txn0) + mdbx_txn_valgrind(env, nullptr); +#endif /* Export or close DBI handles created in this txn */ mdbx_dbis_update(txn, mode & MDBX_END_UPDATE); - if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */ mdbx_cursors_eot(txn, 0); if (!(env->me_flags & MDBX_WRITEMAP)) @@ -6074,6 +6155,10 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, const pgno_t largest_pgno = mdbx_find_largest( env, (head->mm_geo.next > pending->mm_geo.next) ? head->mm_geo.next : pending->mm_geo.next); + VALGRIND_MAKE_MEM_NOACCESS(env->me_map + pgno2bytes(env, largest_pgno), + env->me_mapsize - pgno2bytes(env, largest_pgno)); + ASAN_POISON_MEMORY_REGION(env->me_map + pgno2bytes(env, largest_pgno), + env->me_mapsize - pgno2bytes(env, largest_pgno)); const size_t largest_aligned2os_bytes = pgno_align2os_bytes(env, largest_pgno); const pgno_t largest_aligned2os_pgno = @@ -6420,11 +6505,16 @@ bailout: static int __cold mdbx_env_map(MDBX_env *env, const int is_exclusive, const size_t usedsize) { + mdbx_assert(env, usedsize >= pgno2bytes(env, NUM_METAS)); int rc = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now, env->me_dbgeo.upper); if (unlikely(rc != MDBX_SUCCESS)) return rc; + VALGRIND_MAKE_MEM_NOACCESS(env->me_map + usedsize, + env->me_mapsize - usedsize); + ASAN_POISON_MEMORY_REGION(env->me_map + usedsize, env->me_mapsize - usedsize); + #ifdef MADV_DONTFORK if (unlikely(madvise(env->me_map, env->me_mapsize, MADV_DONTFORK) != 0)) return errno; @@ -6519,7 +6609,7 @@ static int __cold mdbx_env_map(MDBX_env *env, const int is_exclusive, #endif /* Windows */ } -#ifdef USE_VALGRIND +#ifdef MDBX_USE_VALGRIND env->me_valgrind_handle = VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "mdbx"); #endif @@ -7051,7 +7141,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) { } } - err = mdbx_env_map(env, lck_rc /* exclusive status */, expected_bytes); + err = mdbx_env_map(env, lck_rc /* exclusive status */, used_bytes); if (err != MDBX_SUCCESS) return err; @@ -7652,7 +7742,7 @@ static int __cold mdbx_env_close0(MDBX_env *env) { if (env->me_map) { mdbx_munmap(&env->me_dxb_mmap); -#ifdef USE_VALGRIND +#ifdef MDBX_USE_VALGRIND VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = -1; #endif @@ -15670,6 +15760,12 @@ __dll_export " MDBX_TXN_CHECKPID=" STRINGIFY(MDBX_TXN_CHECKPID) " MDBX_TXN_CHECKOWNER=" STRINGIFY(MDBX_TXN_CHECKOWNER) " MDBX_64BIT_ATOMIC=" STRINGIFY(MDBX_64BIT_ATOMIC) +#ifdef __SANITIZE_ADDRESS__ + " SANITIZE_ADDRESS=YES" +#endif /* __SANITIZE_ADDRESS__ */ +#ifdef MDBX_USE_VALGRIND + " MDBX_USE_VALGRIND=YES" +#endif /* MDBX_USE_VALGRIND */ #ifdef _GNU_SOURCE " _GNU_SOURCE=YES" #else diff --git a/src/elements/defs.h b/src/elements/defs.h index 83c32831..f70c3d77 100644 --- a/src/elements/defs.h +++ b/src/elements/defs.h @@ -363,7 +363,7 @@ typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__))); /*----------------------------------------------------------------------------*/ -#if defined(USE_VALGRIND) +#if defined(MDBX_USE_VALGRIND) # include # ifndef VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE /* LY: available since Valgrind 3.10 */ @@ -385,7 +385,7 @@ typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__))); # define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(a,s) (0) # define VALGRIND_CHECK_MEM_IS_DEFINED(a,s) (0) # define RUNNING_ON_VALGRIND (0) -#endif /* USE_VALGRIND */ +#endif /* MDBX_USE_VALGRIND */ #ifdef __SANITIZE_ADDRESS__ # include diff --git a/src/elements/internals.h b/src/elements/internals.h index c7e5e645..ed2e1fca 100644 --- a/src/elements/internals.h +++ b/src/elements/internals.h @@ -965,8 +965,11 @@ struct MDBX_env { #if MDBX_DEBUG MDBX_assert_func *me_assert_func; /* Callback for assertion failures */ #endif -#ifdef USE_VALGRIND +#ifdef MDBX_USE_VALGRIND int me_valgrind_handle; +#endif +#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) + pgno_t me_poison_edge; #endif MDBX_env *me_lcklist_next; diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index f606854a..7b9f90fa 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -8,9 +8,22 @@ set -euo pipefail UNAME="$(uname -s 2>/dev/null || echo Unknown)" +## NOTE: Valgrind could produce some false-positive warnings +## in multi-process environment with shared memory. +## For instance, when the process "A" explicitly marks a memory +## region as "undefined", the process "B" fill it, +## and after this process "A" read such region, etc. +#VALGRIND="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" + ############################################################################### # 1. clean data from prev runs and examine available RAM +if [[ -v VALGRIND && ! -z "$VALGRIND" ]]; then + rm -f valgrind-*.log +else + VALGRIND=time +fi + WANNA_MOUNT=0 case ${UNAME} in Linux) @@ -164,9 +177,9 @@ function probe { echo "=============================================== $(date)" echo "${caption}: $*" rm -f ${TESTDB_DIR}/* \ - && ./mdbx_test --ignore-dbfull --repeat=42 --pathname=${TESTDB_DIR}/long.db "$@" | lz4 > ${TESTDB_DIR}/long.log.lz4 \ - && ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ - && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ + && ${VALGRIND} ./mdbx_test --ignore-dbfull --repeat=42 --pathname=${TESTDB_DIR}/long.db "$@" | lz4 > ${TESTDB_DIR}/long.log.lz4 \ + && ${VALGRIND} ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ + && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${VALGRIND} ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ || (echo "FAILED"; exit 1) } diff --git a/test/valgrind_suppress.txt b/test/valgrind_suppress.txt new file mode 100644 index 00000000..84c4746f --- /dev/null +++ b/test/valgrind_suppress.txt @@ -0,0 +1,16 @@ +{ + msync-whole-mmap-1 + Memcheck:Param + msync(start) + fun:msync + ... + fun:mdbx_sync_locked +} +{ + msync-whole-mmap-2 + Memcheck:Param + msync(start) + fun:msync + ... + fun:mdbx_env_sync_ex +}