From bfc6900b077ee88df8a824d0e828c212b3b6a997 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Sat, 7 May 2016 03:22:42 +0300 Subject: [PATCH 1/5] mdbx: reporting 'Free pages' -> 'Unallocated'. Change-Id: I31ef11250139e3ec97240610b40b5f0ec7deaa6c --- mdb_stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdb_stat.c b/mdb_stat.c index de5725bf..b3533fdd 100644 --- a/mdb_stat.c +++ b/mdb_stat.c @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) printf(" Used now: %zu %.1f%%\n", value, value / percent); value = pages; - printf(" Free pages: %zu %.1f%%\n", value, value / percent); + printf(" Unallocated: %zu %.1f%%\n", value, value / percent); value = pages - reclaimable; printf(" Detained: %zu %.1f%%\n", value, value / percent); From 14b3afdcffcd9b9815b00f2bc3bf7d83097cd1c3 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Sat, 7 May 2016 03:43:36 +0300 Subject: [PATCH 2/5] mdbx: notify OOM-handler about end of loop. Change-Id: I71d66e371df869560801e12b8b06c4f4cbf90e98 --- lmdb.h | 4 +++- mdb.c | 60 ++++++++++++++++++++++++++++++++-------------------------- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/lmdb.h b/lmdb.h index acff2609..db57d5ed 100644 --- a/lmdb.h +++ b/lmdb.h @@ -1671,12 +1671,14 @@ int mdb_reader_check(MDB_env *env, int *dead); int mdbx_txn_straggler(MDB_txn *txn, int *percent); /** @brief A callback function for killing a laggard readers, - * called in case of MDB_MAP_FULL error. + * but also could waiting ones. Called in case of MDB_MAP_FULL error. * * @param[in] env An environment handle returned by #mdb_env_create(). * @param[in] pid pid of the reader process. * @param[in] thread_id thread_id of the reader thread. * @param[in] txn Transaction number on which stalled. + * @param[in] gap a lag from the last commited txn. + * @param[in] retry a retry number, less that zero for notify end of OOM-loop. * @return -1 on failure (reader is not killed), * 0 on a race condition (no such reader), * 1 on success (reader was killed), diff --git a/mdb.c b/mdb.c index 9e0c902b..6744ac76 100644 --- a/mdb.c +++ b/mdb.c @@ -1990,43 +1990,49 @@ mdbx_oomkick(MDB_env *env, txnid_t oldest) break; snap = mdb_find_oldest(env, &reader); - if (oldest < snap) + if (oldest < snap || reader < 0) { + if (retry && env->me_oom_func) { + /* LY: notify end of oom-loop */ + env->me_oom_func(env, 0, 0, oldest, snap - oldest, -retry); + } return snap; + } - if (reader < 0) - return 0; + MDB_reader *r; + pthread_t tid; + pid_t pid; + int rc; - { - MDB_reader *r; - pthread_t tid; - pid_t pid; - int rc; + if (!env->me_oom_func) + break; - if (!env->me_oom_func) - break; + r = &env->me_txns->mti_readers[ reader ]; + pid = r->mr_pid; + tid = r->mr_tid; + if (r->mr_txnid != oldest || pid <= 0) + continue; - r = &env->me_txns->mti_readers[ reader ]; - pid = r->mr_pid; - tid = r->mr_tid; - if (r->mr_txnid != oldest || pid <= 0) - continue; + rc = env->me_oom_func(env, pid, (void*) tid, oldest, + mdb_meta_head_w(env)->mm_txnid - oldest, retry); + if (rc < 0) + break; - rc = env->me_oom_func(env, pid, (void*) tid, oldest, - mdb_meta_head_w(env)->mm_txnid - oldest, retry); - if (rc < 0) - break; - - if (rc) { - r->mr_txnid = ~(txnid_t)0; - if (rc > 1) { - r->mr_tid = 0; - r->mr_pid = 0; - mdbx_coherent_barrier(); - } + if (rc) { + r->mr_txnid = ~(txnid_t)0; + if (rc > 1) { + r->mr_tid = 0; + r->mr_pid = 0; + mdbx_coherent_barrier(); } } } + + if (retry && env->me_oom_func) { + /* LY: notify end of oom-loop */ + env->me_oom_func(env, 0, 0, oldest, 0, -retry); + } #else + (void) oldest; (void) mdb_reader_check(env, NULL); #endif /* MDBX_MODE_ENABLED */ return mdb_find_oldest(env, NULL); From 8791e3243fb83740017a71ea8d745ea6b6a9e8a1 Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Sat, 7 May 2016 15:21:59 +0300 Subject: [PATCH 3/5] mdbx: workaround for pthread_setspecific's memleak. Change-Id: I8b48548a40bb2fe58db14b8f6944f5a3802ffbb9 --- mdb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mdb.c b/mdb.c index 6744ac76..97061ddf 100644 --- a/mdb.c +++ b/mdb.c @@ -5019,6 +5019,7 @@ mdb_env_close0(MDB_env *env) if (!(env->me_flags & MDB_ENV_ACTIVE)) return; + env->me_flags &= ~MDB_ENV_ACTIVE; /* Doing this here since me_dbxs may not exist during mdb_env_close */ if (env->me_dbxs) { @@ -5038,7 +5039,12 @@ mdb_env_close0(MDB_env *env) mdb_midl_free(env->me_free_pgs); if (env->me_flags & MDB_ENV_TXKEY) { + struct MDB_rthc *rthc = pthread_getspecific(env->me_txkey); + if (rthc && pthread_setspecific(env->me_txkey, NULL) == 0) { + mdb_env_reader_destr(rthc); + } pthread_key_delete(env->me_txkey); + env->me_flags &= ~MDB_ENV_TXKEY; } if (env->me_map) { @@ -5083,8 +5089,6 @@ mdb_env_close0(MDB_env *env) if (env->me_lfd != INVALID_HANDLE_VALUE) { (void) close(env->me_lfd); } - - env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); } #if ! MDBX_MODE_ENABLED From 4d3e349c16760807ad1681f6af014ab9856289ea Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Sat, 7 May 2016 23:42:50 +0300 Subject: [PATCH 4/5] mdbx: don't memcpy when src eq dest. Workaround for 'destination overlaps source' from Valgrind. Change-Id: Ib12eacbd224057334311816346ea0ccece2b42cf --- mdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mdb.c b/mdb.c index 97061ddf..c8f3a0fe 100644 --- a/mdb.c +++ b/mdb.c @@ -7383,14 +7383,14 @@ update: memcpy(ndata, data->mv_data, sizeof(pgno_t)); else if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = ndata; - else + else if (ndata != data->mv_data) memcpy(ndata, data->mv_data, data->mv_size); } else { memcpy(ndata, &ofp->mp_pgno, sizeof(pgno_t)); ndata = PAGEDATA(ofp); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = ndata; - else + else if (ndata != data->mv_data) memcpy(ndata, data->mv_data, data->mv_size); } } From 7c3c28239c12775f81e528f35e19e06960b1077b Mon Sep 17 00:00:00 2001 From: Leo Yuriev Date: Sun, 8 May 2016 02:29:59 +0300 Subject: [PATCH 5/5] mdbx: more likely/unlikely for mdb_node_add. Change-Id: I1f241c6cea55db6fed744ba9f6f5733f6622015f --- mdb.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mdb.c b/mdb.c index c8f3a0fe..b2fe20cb 100644 --- a/mdb.c +++ b/mdb.c @@ -7327,10 +7327,10 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, node_size += key->mv_size; if (IS_LEAF(mp)) { mdb_cassert(mc, key && data); - if (F_ISSET(flags, F_BIGDATA)) { + if (unlikely(F_ISSET(flags, F_BIGDATA))) { /* Data already on overflow page. */ node_size += sizeof(pgno_t); - } else if (node_size + data->mv_size > mc->mc_txn->mt_env->me_nodemax) { + } else if (unlikely(node_size + data->mv_size > mc->mc_txn->mt_env->me_nodemax)) { int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize); int rc; /* Put data on overflow page. */ @@ -7378,19 +7378,19 @@ update: if (IS_LEAF(mp)) { ndata = NODEDATA(node); - if (ofp == NULL) { - if (F_ISSET(flags, F_BIGDATA)) + if (unlikely(ofp == NULL)) { + if (unlikely(F_ISSET(flags, F_BIGDATA))) memcpy(ndata, data->mv_data, sizeof(pgno_t)); else if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = ndata; - else if (ndata != data->mv_data) + else if (likely(ndata != data->mv_data)) memcpy(ndata, data->mv_data, data->mv_size); } else { memcpy(ndata, &ofp->mp_pgno, sizeof(pgno_t)); ndata = PAGEDATA(ofp); if (F_ISSET(flags, MDB_RESERVE)) data->mv_data = ndata; - else if (ndata != data->mv_data) + else if (likely(ndata != data->mv_data)) memcpy(ndata, data->mv_data, data->mv_size); } }