mdbx: Merge branch 'master' into nexenta.

This commit is contained in:
Leo Yuriev 2016-04-28 08:21:43 +03:00
commit 0a65b26c58
18 changed files with 563 additions and 482 deletions

View File

@ -18,8 +18,8 @@ prefix ?= /usr/local
mandir ?= $(prefix)/man mandir ?= $(prefix)/man
CC ?= gcc CC ?= gcc
XCFLAGS ?= XCFLAGS ?= -DNDEBUG=1 -DMDB_DEBUG=0
CFLAGS ?= -O2 -ggdb3 -Wall -Werror -DNDEBUG=1 CFLAGS ?= -O2 -g3 -Wall -Werror -Wextra
CFLAGS += -pthread $(XCFLAGS) CFLAGS += -pthread $(XCFLAGS)
IOARENA ?= ../ioarena.git/@BUILD/src/ioarena IOARENA ?= ../ioarena.git/@BUILD/src/ioarena
@ -76,7 +76,7 @@ libmdbx.a: mdbx.o
$(AR) rs $@ $^ $(AR) rs $@ $^
libmdbx.so: mdbx.lo libmdbx.so: mdbx.lo
$(CC) $(CFLAGS) $(LDFLAGS) -pthread -shared -o $@ $^ $(CC) $(CFLAGS) $(LDFLAGS) -save-temps -pthread -shared -o $@ $^
liblmdb.a: lmdb.o liblmdb.a: lmdb.o
$(AR) rs $@ $^ $(AR) rs $@ $^
@ -168,23 +168,28 @@ ifneq ($(wildcard $(IOARENA)),)
.PHONY: bench clean-bench re-bench .PHONY: bench clean-bench re-bench
bench: bench-lmdb.txt bench-mdbx.txt
clean-bench: clean-bench:
rm -rf bench-*.txt _ioarena rm -rf bench-*.txt _ioarena/*
re-bench: clean-bench bench re-bench: clean-bench bench
NN := 25000000
define bench-rule
bench-$(1).txt: $(3) $(IOARENA) Makefile
$(IOARENA) -D $(1) -B crud -m nosync -n $(2) | tee $$@ | grep throughput \
&& $(IOARENA) -D $(1) -B get,iterate -m sync -r 4 -n $(2) | tee -a $$@ | grep throughput \
|| rm -f $$@
endef
$(eval $(call bench-rule,mdbx,$(NN),libmdbx.so))
$(eval $(call bench-rule,lmdb,$(NN)))
$(eval $(call bench-rule,dummy,$(NN)))
$(eval $(call bench-rule,debug,10))
bench: bench-lmdb.txt bench-mdbx.txt bench: bench-lmdb.txt bench-mdbx.txt
bench-mdbx.txt: libmdbx.so $(IOARENA)
$(IOARENA) -D mdbx -B crud -m nosync -n 10000000 | tee $@ \
&& $(IOARENA) -D mdbx -B get,iterate -m sync -r 4 -n 10000000 | tee -a $@ \
|| rm -f $@
bench-lmdb.txt: $(IOARENA)
$(IOARENA) -D lmdb -B crud -m nosync -n 10000000 | tee $@ \
&& $(IOARENA) -D lmdb -B get,iterate -m sync -r 4 -n 10000000 | tee -a $@ \
|| rm -f $@
endif endif

252
mdb.c
View File

@ -1902,7 +1902,7 @@ mdb_meta_head_r(MDB_env *env) {
} else if (likely(b->mm_txnid == head_txnid)) { } else if (likely(b->mm_txnid == head_txnid)) {
h = b; h = b;
} else { } else {
/* LY: seems got a race with mdb_env_sync0() */ /* LY: seems got a collision with mdb_env_sync0() */
mdb_coherent_barrier(); mdb_coherent_barrier();
head_txnid = env->me_txns->mti_txnid; head_txnid = env->me_txns->mti_txnid;
mdb_assert(env, a->mm_txnid != b->mm_txnid || head_txnid == 0); mdb_assert(env, a->mm_txnid != b->mm_txnid || head_txnid == 0);
@ -2114,7 +2114,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp, int flags)
goto fail; goto fail;
} }
for (;;) { /* oomkick retry loop */ for (;;) { /* oom-kick retry loop */
found_old = 0; found_old = 0;
for (op = MDB_FIRST;; op = (flags & MDB_LIFORECLAIM) ? MDB_PREV : MDB_NEXT) { for (op = MDB_FIRST;; op = (flags & MDB_LIFORECLAIM) ? MDB_PREV : MDB_NEXT) {
MDB_val key, data; MDB_val key, data;
@ -2321,6 +2321,12 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp, int flags)
* utterly no-sync write mode was requested. In such case * utterly no-sync write mode was requested. In such case
* don't make a steady-sync, but only a legacy-mode checkpoint, * don't make a steady-sync, but only a legacy-mode checkpoint,
* just for resume reclaiming only, not for data consistency. */ * just for resume reclaiming only, not for data consistency. */
mdb_debug("kick-gc: head %zu/%c, tail %zu/%c, oldest %zu, txnid %zu",
head->mm_txnid, META_IS_WEAK(head) ? 'W' : 'N',
tail->mm_txnid, META_IS_WEAK(tail) ? 'W' : 'N',
oldest, env->me_txns->mt1.mtb.mtb_txnid );
int flags = env->me_flags & MDB_WRITEMAP; int flags = env->me_flags & MDB_WRITEMAP;
if ((env->me_flags & MDB_UTTERLY_NOSYNC) == MDB_UTTERLY_NOSYNC) if ((env->me_flags & MDB_UTTERLY_NOSYNC) == MDB_UTTERLY_NOSYNC)
flags |= MDB_UTTERLY_NOSYNC; flags |= MDB_UTTERLY_NOSYNC;
@ -3273,7 +3279,7 @@ static MDB_INLINE int
mdb_backlog_size(MDB_txn *txn) mdb_backlog_size(MDB_txn *txn)
{ {
int reclaimed = txn->mt_env->me_pghead ? txn->mt_env->me_pghead[0] : 0; int reclaimed = txn->mt_env->me_pghead ? txn->mt_env->me_pghead[0] : 0;
return reclaimed += txn->mt_loose_count; return reclaimed + txn->mt_loose_count;
} }
/* LY: Prepare a backlog of pages to modify FreeDB itself, /* LY: Prepare a backlog of pages to modify FreeDB itself,
@ -3282,30 +3288,20 @@ mdb_backlog_size(MDB_txn *txn)
static int static int
mdb_prep_backlog(MDB_txn *txn, MDB_cursor *mc) mdb_prep_backlog(MDB_txn *txn, MDB_cursor *mc)
{ {
/* LY: Critical level (1) for copy a one leaf-page. /* LY: extra page(s) for b-tree rebalancing */
* But also (+2) for split leaf-page into a couple with creation const int extra = (txn->mt_env->me_flags & MDB_LIFORECLAIM) ? 2 : 1;
* one branch-page (for ability of insertion and my paranoia). */
int minimal_level = 3;
/* LY: Safe level for update branch-pages from root */ if (mdb_backlog_size(txn) < mc->mc_db->md_depth + extra) {
int safe_level = minimal_level + 8;
if (mdb_backlog_size(txn) < safe_level) {
/* Make sure "hot" pages of freeDB is touched and on freelist */
int rc = mdb_cursor_touch(mc); int rc = mdb_cursor_touch(mc);
if (unlikely(rc)) if (unlikely(rc))
return rc; return rc;
while (mdb_backlog_size(txn) < minimal_level) { while (unlikely(mdb_backlog_size(txn) < extra)) {
MDB_page *mp = NULL; rc = mdb_page_alloc(mc, 1, NULL, MDB_ALLOC_GC);
rc = mdb_page_alloc(mc, 1, &mp, MDB_ALLOC_GC | MDB_ALLOC_NEW); if (unlikely(rc)) {
if (unlikely(rc)) if (unlikely(rc != MDB_NOTFOUND))
return rc; return rc;
if (mp) { break;
NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
txn->mt_loose_pgs = mp;
txn->mt_loose_count++;
mp->mp_flags |= P_LOOSE;
} }
} }
} }
@ -3462,7 +3458,7 @@ again:
/* LY: need more just a txn-id for save page list. */ /* LY: need more just a txn-id for save page list. */
rc = mdb_page_alloc(&mc, 0, NULL, MDB_ALLOC_GC); rc = mdb_page_alloc(&mc, 0, NULL, MDB_ALLOC_GC);
if (likely(rc == 0)) if (likely(rc == 0))
/* LY: ок, reclaimed from freedb. */ /* LY: ok, reclaimed from freedb. */
continue; continue;
if (unlikely(rc != MDB_NOTFOUND)) if (unlikely(rc != MDB_NOTFOUND))
/* LY: other troubles... */ /* LY: other troubles... */
@ -4134,17 +4130,17 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
int rc; int rc;
MDB_meta* head = mdb_meta_head_w(env); MDB_meta* head = mdb_meta_head_w(env);
size_t prev_mapsize = head->mm_mapsize; size_t prev_mapsize = head->mm_mapsize;
MDB_meta* tail = META_IS_WEAK(head) ? head : mdb_env_meta_flipflop(env, head); volatile MDB_meta* target = META_IS_WEAK(head) ? head : mdb_env_meta_flipflop(env, head);
off_t offset = (char*) tail - env->me_map; off_t offset = (char*) target - env->me_map;
size_t used_size = env->me_psize * (pending->mm_last_pg + 1); size_t used_size = env->me_psize * (pending->mm_last_pg + 1);
mdb_assert(env, (env->me_flags & (MDB_RDONLY | MDB_FATAL_ERROR)) == 0); mdb_assert(env, (env->me_flags & (MDB_RDONLY | MDB_FATAL_ERROR)) == 0);
mdb_assert(env, META_IS_WEAK(head) || env->me_sync_pending != 0 mdb_assert(env, META_IS_WEAK(head) || env->me_sync_pending != 0
|| env->me_mapsize != prev_mapsize); || env->me_mapsize != prev_mapsize);
mdb_assert(env, pending->mm_txnid > head->mm_txnid || META_IS_WEAK(head)); mdb_assert(env, pending->mm_txnid > head->mm_txnid || META_IS_WEAK(head));
mdb_assert(env, pending->mm_txnid > tail->mm_txnid || META_IS_WEAK(tail)); mdb_assert(env, pending->mm_txnid > target->mm_txnid || META_IS_WEAK(target));
MDB_meta* stay = mdb_env_meta_flipflop(env, tail); MDB_meta* stay = mdb_env_meta_flipflop(env, (MDB_meta*) target);
mdb_assert(env, pending->mm_txnid > stay->mm_txnid); mdb_assert(env, pending->mm_txnid > stay->mm_txnid);
pending->mm_mapsize = env->me_mapsize; pending->mm_mapsize = env->me_mapsize;
@ -4172,7 +4168,7 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
if ((flags & MDB_MAPASYNC) == 0) if ((flags & MDB_MAPASYNC) == 0)
env->me_sync_pending = 0; env->me_sync_pending = 0;
} else { } else {
int (*sync_fd)(int fd) = fdatasync; int (*flush)(int fd) = fdatasync;
if (unlikely(prev_mapsize != pending->mm_mapsize)) { if (unlikely(prev_mapsize != pending->mm_mapsize)) {
/* LY: It is no reason to use fdatasync() here, even in case /* LY: It is no reason to use fdatasync() here, even in case
* no such bug in a kernel. Because "no-bug" mean that a kernel * no such bug in a kernel. Because "no-bug" mean that a kernel
@ -4184,9 +4180,9 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
* *
* For more info about of a corresponding fdatasync() bug * For more info about of a corresponding fdatasync() bug
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */ * see http://www.spinics.net/lists/linux-ext4/msg33714.html */
sync_fd = fsync; flush = fsync;
} }
while(unlikely(sync_fd(env->me_fd) < 0)) { while(unlikely(flush(env->me_fd) < 0)) {
rc = errno; rc = errno;
if (rc != EINTR) if (rc != EINTR)
goto undo; goto undo;
@ -4203,23 +4199,27 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
(flags & MDB_UTTERLY_NOSYNC) == MDB_UTTERLY_NOSYNC (flags & MDB_UTTERLY_NOSYNC) == MDB_UTTERLY_NOSYNC
? MDB_DATASIGN_NONE : MDB_DATASIGN_WEAK; ? MDB_DATASIGN_NONE : MDB_DATASIGN_WEAK;
} }
mdb_debug("writing meta page %d for root page %zu", mdb_debug("writing meta %d, root %zu, txn_id %zu, %s",
offset >= env->me_psize, pending->mm_dbs[MAIN_DBI].md_root); offset >= env->me_psize, pending->mm_dbs[MAIN_DBI].md_root,
pending->mm_txnid,
META_IS_WEAK(pending) ? "Weak" : META_IS_STEADY(pending) ? "Steady" : "Legacy" );
if (env->me_flags & MDB_WRITEMAP) { if (env->me_flags & MDB_WRITEMAP) {
#ifdef __SANITIZE_THREAD__ #ifdef __SANITIZE_THREAD__
pthread_mutex_lock(&tsan_mutex); pthread_mutex_lock(&tsan_mutex);
#endif #endif
tail->mm_datasync_sign = MDB_DATASIGN_WEAK; /* LY: 'invalidate' the meta,
tail->mm_txnid = 0; * but mdb_meta_head_r() will be confused/retired in collision case. */
mdb_coherent_barrier(); target->mm_datasync_sign = MDB_DATASIGN_WEAK;
tail->mm_mapsize = pending->mm_mapsize; target->mm_txnid = 0;
tail->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI]; /* LY: update info */
tail->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI]; target->mm_mapsize = pending->mm_mapsize;
tail->mm_last_pg = pending->mm_last_pg; target->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI];
/* (LY) ITS#7969: issue a memory barrier, it is noop for x86. */ target->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI];
mdb_coherent_barrier(); target->mm_last_pg = pending->mm_last_pg;
tail->mm_txnid = pending->mm_txnid; /* LY: 'commit' the meta */
tail->mm_datasync_sign = pending->mm_datasync_sign; target->mm_txnid = pending->mm_txnid;
target->mm_datasync_sign = pending->mm_datasync_sign;
} else { } else {
pending->mm_magic = MDB_MAGIC; pending->mm_magic = MDB_MAGIC;
pending->mm_version = MDB_DATA_VERSION; pending->mm_version = MDB_DATA_VERSION;
@ -4235,7 +4235,7 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
mdb_debug("write failed, disk error?"); mdb_debug("write failed, disk error?");
/* On a failure, the pagecache still contains the new data. /* On a failure, the pagecache still contains the new data.
* Write some old data back, to prevent it from being used. */ * Write some old data back, to prevent it from being used. */
if (pwrite(env->me_fd, tail, sizeof(MDB_meta), offset) == sizeof(MDB_meta)) { if (pwrite(env->me_fd, (void*) target, sizeof(MDB_meta), offset) == sizeof(MDB_meta)) {
/* LY: take a chance, if write succeeds at a magic ;) */ /* LY: take a chance, if write succeeds at a magic ;) */
goto retry; goto retry;
} }
@ -4319,7 +4319,7 @@ mdb_env_create(MDB_env **env)
} }
static int __cold static int __cold
mdb_env_map(MDB_env *env, void *addr) mdb_env_map(MDB_env *env, void *addr, size_t usedsize)
{ {
unsigned flags = env->me_flags; unsigned flags = env->me_flags;
@ -4336,16 +4336,17 @@ mdb_env_map(MDB_env *env, void *addr)
return errno; return errno;
} }
if (flags & MDB_NORDAHEAD) { /* Can happen because the address argument to mmap() is just a
/* Turn off readahead. It's harmful when the DB is larger than RAM. */ * hint. mmap() can pick another, e.g. if the range is in use.
if (madvise(env->me_map, env->me_mapsize, MADV_RANDOM) < 0) * The MAP_FIXED flag would prevent that, but then mmap could
return errno; * instead unmap existing pages to make room for the new map.
*/
if (addr && env->me_map != addr) {
errno = 0; /* LY: clean errno as a hit for this case */
return EBUSY; /* TODO: Make a new MDB_* error code? */
} }
if (madvise(env->me_map, env->me_mapsize, MADV_DONTFORK) < 0) if (madvise(env->me_map, env->me_mapsize, MADV_DONTFORK))
return errno;
if (madvise(env->me_map, env->me_mapsize, MADV_WILLNEED) < 0)
return errno; return errno;
#ifdef MADV_NOHUGEPAGE #ifdef MADV_NOHUGEPAGE
@ -4358,15 +4359,16 @@ mdb_env_map(MDB_env *env, void *addr)
} }
#endif #endif
/* Can happen because the address argument to mmap() is just a #ifdef MADV_REMOVE
* hint. mmap() can pick another, e.g. if the range is in use. if (flags & MDB_WRITEMAP) {
* The MAP_FIXED flag would prevent that, but then mmap could assert(used_edge < env->me_mapsize);
* instead unmap existing pages to make room for the new map. (void) madvise(env->me_map + usedsize, env->me_mapsize - usedsize, MADV_REMOVE);
*/
if (addr && env->me_map != addr) {
errno = 0; /* LY: clean errno as a hit for this case */
return EBUSY; /* TODO: Make a new MDB_* error code? */
} }
#endif
/* Turn on/off readahead. It's harmful when the DB is larger than RAM. */
if (madvise(env->me_map, env->me_mapsize, (flags & MDB_NORDAHEAD) ? MADV_RANDOM : MADV_WILLNEED))
return errno;
/* Lock meta pages to avoid unexpected write, /* Lock meta pages to avoid unexpected write,
* before the data pages would be synchronized. */ * before the data pages would be synchronized. */
@ -4374,8 +4376,8 @@ mdb_env_map(MDB_env *env, void *addr)
return errno; return errno;
#ifdef USE_VALGRIND #ifdef USE_VALGRIND
env->me_valgrind_handle = VALGRIND_CREATE_BLOCK( env->me_valgrind_handle =
env->me_map, env->me_mapsize, "lmdb"); VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "lmdb");
#endif #endif
return MDB_SUCCESS; return MDB_SUCCESS;
@ -4405,12 +4407,10 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
meta = mdb_meta_head_w(env); meta = mdb_meta_head_w(env);
if (!size) if (!size)
size = meta->mm_mapsize; size = meta->mm_mapsize;
{ /* Silently round up to minimum if the size is too small */
/* Silently round up to minimum if the size is too small */ const size_t usedsize = (meta->mm_last_pg + 1) * env->me_psize;
size_t minsize = (meta->mm_last_pg + 1) * env->me_psize; if (size < usedsize)
if (size < minsize) size = usedsize;
size = minsize;
}
munmap(env->me_map, env->me_mapsize); munmap(env->me_map, env->me_mapsize);
#ifdef USE_VALGRIND #ifdef USE_VALGRIND
VALGRIND_DISCARD(env->me_valgrind_handle); VALGRIND_DISCARD(env->me_valgrind_handle);
@ -4418,7 +4418,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
#endif #endif
env->me_mapsize = size; env->me_mapsize = size;
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL; old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
rc = mdb_env_map(env, old); rc = mdb_env_map(env, old, usedsize);
if (rc) if (rc)
return rc; return rc;
} }
@ -4536,7 +4536,8 @@ mdb_env_open2(MDB_env *env, MDB_meta *meta)
newenv = 0; newenv = 0;
} }
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta->mm_address : NULL); const size_t usedsize = (meta->mm_last_pg + 1) * env->me_psize;
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta->mm_address : NULL, usedsize);
if (rc) if (rc)
return rc; return rc;
@ -4869,6 +4870,13 @@ mdbx_env_open_ex(MDB_env *env, const char *path, unsigned flags, mode_t mode, in
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE)) if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDB_VERSION_MISMATCH; return MDB_VERSION_MISMATCH;
#if MDB_LIFORECLAIM
/* LY: don't allow LIFO with just NOMETASYNC */
if ((flags & (MDB_NOMETASYNC | MDB_LIFORECLAIM | MDB_NOSYNC))
== (MDB_NOMETASYNC | MDB_LIFORECLAIM))
return EINVAL;
#endif /* MDB_LIFORECLAIM */
if (env->me_fd != INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) if (env->me_fd != INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
return EINVAL; return EINVAL;
@ -5129,7 +5137,11 @@ mdb_env_close(MDB_env *env)
* | 1, a > b * | 1, a > b
* \ * \
*/ */
#define mdbx_cmp2int(a, b) (((a) > (b)) - ((b) > (a))) #if 1
# define mdbx_cmp2int(a, b) (((b) > (a)) ? -1 : (a) > (b))
#else
# define mdbx_cmp2int(a, b) (((a) > (b)) - ((b) > (a)))
#endif
/** Compare two items pointing at aligned unsigned int's. */ /** Compare two items pointing at aligned unsigned int's. */
static int __hot static int __hot
@ -5225,9 +5237,22 @@ mdb_cmp_int_ua(const MDB_val *a, const MDB_val *b)
static int __hot static int __hot
mdb_cmp_memn(const MDB_val *a, const MDB_val *b) mdb_cmp_memn(const MDB_val *a, const MDB_val *b)
{ {
size_t minlen = (a->mv_size < b->mv_size) ? a->mv_size : b->mv_size; /* LY: assumes that length of keys are NOT equal for most cases,
int diff = memcmp(a->mv_data, b->mv_data, minlen); * if no then branch-prediction should mitigate the problem */
return likely(diff) ? diff : mdbx_cmp2int(a->mv_size, b->mv_size); #if 0
/* LY: without branch instructions on x86,
* but isn't best for equal length of keys */
int diff_len = mdbx_cmp2int(a->mv_size, b->mv_size);
#else
/* LY: best when length of keys are equal,
* but got a branch-penalty otherwise */
if (unlikely(a->mv_size == b->mv_size))
return memcmp(a->mv_data, b->mv_data, a->mv_size);
int diff_len = (a->mv_size < b->mv_size) ? -1 : 1;
#endif
size_t shortest = (a->mv_size < b->mv_size) ? a->mv_size : b->mv_size;
int diff_data = memcmp(a->mv_data, b->mv_data, shortest);
return likely(diff_data) ? diff_data : diff_len;
} }
/** Compare two items in reverse byte order */ /** Compare two items in reverse byte order */
@ -5834,11 +5859,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
MDB_node *leaf; MDB_node *leaf;
int rc; int rc;
if (unlikely(mc->mc_flags & C_EOF)) { if ((mc->mc_flags & C_EOF) ||
((mc->mc_flags & C_DEL) && op == MDB_NEXT_DUP)) {
return MDB_NOTFOUND; return MDB_NOTFOUND;
} }
if (!(mc->mc_flags & C_INITIALIZED))
mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); return mdb_cursor_first(mc, key, data);
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
@ -5917,7 +5943,12 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
MDB_node *leaf; MDB_node *leaf;
int rc; int rc;
mdb_cassert(mc, mc->mc_flags & C_INITIALIZED); if (!(mc->mc_flags & C_INITIALIZED)) {
rc = mdb_cursor_last(mc, key, data);
if (unlikely(rc))
return rc;
mc->mc_ki[mc->mc_top]++;
}
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
@ -6367,10 +6398,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
rc = MDB_INCOMPATIBLE; rc = MDB_INCOMPATIBLE;
break; break;
} }
if (!(mc->mc_flags & C_INITIALIZED)) rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP);
rc = mdb_cursor_first(mc, key, data);
else
rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP);
if (rc == MDB_SUCCESS) { if (rc == MDB_SUCCESS) {
if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
MDB_cursor *mx; MDB_cursor *mx;
@ -6412,21 +6440,11 @@ fetchm:
case MDB_NEXT: case MDB_NEXT:
case MDB_NEXT_DUP: case MDB_NEXT_DUP:
case MDB_NEXT_NODUP: case MDB_NEXT_NODUP:
if (!(mc->mc_flags & C_INITIALIZED)) rc = mdb_cursor_next(mc, key, data, op);
rc = mdb_cursor_first(mc, key, data);
else
rc = mdb_cursor_next(mc, key, data, op);
break; break;
case MDB_PREV: case MDB_PREV:
case MDB_PREV_DUP: case MDB_PREV_DUP:
case MDB_PREV_NODUP: case MDB_PREV_NODUP:
if (!(mc->mc_flags & C_INITIALIZED)) {
rc = mdb_cursor_last(mc, key, data);
if (unlikely(rc))
break;
mc->mc_flags |= C_INITIALIZED;
mc->mc_ki[mc->mc_top]++;
}
rc = mdb_cursor_prev(mc, key, data, op); rc = mdb_cursor_prev(mc, key, data, op);
break; break;
case MDB_FIRST: case MDB_FIRST:
@ -8473,8 +8491,6 @@ mdb_cursor_del0(MDB_cursor *mc)
if (m3->mc_pg[mc->mc_top] == mp) { if (m3->mc_pg[mc->mc_top] == mp) {
if (m3->mc_ki[mc->mc_top] == ki) { if (m3->mc_ki[mc->mc_top] == ki) {
m3->mc_flags |= C_DEL; m3->mc_flags |= C_DEL;
if (mc->mc_db->md_flags & MDB_DUPSORT)
m3->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
} else if (m3->mc_ki[mc->mc_top] > ki) { } else if (m3->mc_ki[mc->mc_top] > ki) {
m3->mc_ki[mc->mc_top]--; m3->mc_ki[mc->mc_top]--;
} }
@ -8508,11 +8524,21 @@ mdb_cursor_del0(MDB_cursor *mc)
continue; continue;
if (m3->mc_pg[mc->mc_top] == mp) { if (m3->mc_pg[mc->mc_top] == mp) {
/* if m3 points past last node in page, find next sibling */ /* if m3 points past last node in page, find next sibling */
if (m3->mc_ki[mc->mc_top] >= nkeys) { if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) {
rc = mdb_cursor_sibling(m3, 1); if (m3->mc_ki[mc->mc_top] >= nkeys) {
if (rc == MDB_NOTFOUND) { rc = mdb_cursor_sibling(m3, 1);
m3->mc_flags |= C_EOF; if (rc == MDB_NOTFOUND) {
rc = MDB_SUCCESS; m3->mc_flags |= C_EOF;
rc = MDB_SUCCESS;
continue;
}
}
if (mc->mc_db->md_flags & MDB_DUPSORT) {
MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]);
if (node->mn_flags & F_DUPDATA) {
mdb_xcursor_init1(m3, node);
m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
}
} }
} }
} }
@ -9573,14 +9599,30 @@ mdb_env_copy(MDB_env *env, const char *path)
} }
int __cold int __cold
mdb_env_set_flags(MDB_env *env, unsigned flag, int onoff) mdb_env_set_flags(MDB_env *env, unsigned flags, int onoff)
{ {
if (unlikely(flag & ~CHANGEABLE)) if (unlikely(flags & ~CHANGEABLE))
return EINVAL; return EINVAL;
pthread_mutex_t *mutex = MDB_MUTEX(env, w);
int rc = mdb_mutex_lock(env, mutex);
if (unlikely(rc))
return rc;
if (onoff) if (onoff)
env->me_flags |= flag; flags = env->me_flags | flags;
else else
env->me_flags &= ~flag; flags = env->me_flags & ~flags;
#if MDB_LIFORECLAIM
/* LY: don't allow LIFO with just NOMETASYNC */
if ((flags & (MDB_NOMETASYNC | MDB_LIFORECLAIM | MDB_NOSYNC))
== (MDB_NOMETASYNC | MDB_LIFORECLAIM))
return EINVAL;
#endif /* MDB_LIFORECLAIM */
env->me_flags = flags;
mdb_mutex_unlock(env, mutex);
return MDB_SUCCESS; return MDB_SUCCESS;
} }

View File

@ -50,6 +50,7 @@ flagbit dbflags[] = {
static volatile sig_atomic_t gotsignal; static volatile sig_atomic_t gotsignal;
static void signal_hanlder( int sig ) { static void signal_hanlder( int sig ) {
(void) sig;
gotsignal = 1; gotsignal = 1;
} }
@ -228,6 +229,8 @@ static size_t problems_pop(struct problem* list) {
static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi, static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi,
const char* type, int nentries, int payload_bytes, int header_bytes, int unused_bytes) const char* type, int nentries, int payload_bytes, int header_bytes, int unused_bytes)
{ {
(void) ctx;
if (type) { if (type) {
size_t page_bytes = payload_bytes + header_bytes + unused_bytes; size_t page_bytes = payload_bytes + header_bytes + unused_bytes;
size_t page_size = pgnumber * stat.base.ms_psize; size_t page_size = pgnumber * stat.base.ms_psize;
@ -250,14 +253,14 @@ static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi,
problem_add("page", pgno, "illegal unused-bytes", "%zu < %i < %zu", problem_add("page", pgno, "illegal unused-bytes", "%zu < %i < %zu",
0, unused_bytes, stat.base.ms_psize); 0, unused_bytes, stat.base.ms_psize);
if (header_bytes < sizeof(long) || header_bytes >= stat.base.ms_psize - sizeof(long)) if (header_bytes < (int) sizeof(long) || (size_t) header_bytes >= stat.base.ms_psize - sizeof(long))
problem_add("page", pgno, "illegal header-length", "%zu < %i < %zu", problem_add("page", pgno, "illegal header-length", "%zu < %i < %zu",
sizeof(long), header_bytes, stat.base.ms_psize - sizeof(long)); sizeof(long), header_bytes, stat.base.ms_psize - sizeof(long));
if (payload_bytes < 1) { if (payload_bytes < 1) {
if (nentries > 0) { if (nentries > 0) {
problem_add("page", pgno, "zero size-of-entry", "payload %i bytes, %i entries", problem_add("page", pgno, "zero size-of-entry", "payload %i bytes, %i entries",
payload_bytes, nentries); payload_bytes, nentries);
if (header_bytes + unused_bytes < page_size) { if ((size_t) header_bytes + unused_bytes < page_size) {
/* LY: hush a misuse error */ /* LY: hush a misuse error */
page_bytes = page_size; page_bytes = page_size;
} }
@ -302,6 +305,9 @@ typedef int (visitor)(size_t record_number, MDB_val *key, MDB_val* data);
static int process_db(MDB_dbi dbi, char *name, visitor *handler, int silent); static int process_db(MDB_dbi dbi, char *name, visitor *handler, int silent);
static int handle_userdb(size_t record_number, MDB_val *key, MDB_val* data) { static int handle_userdb(size_t record_number, MDB_val *key, MDB_val* data) {
(void) record_number;
(void) key;
(void) data;
return MDB_SUCCESS; return MDB_SUCCESS;
} }
@ -366,7 +372,8 @@ static int handle_freedb(size_t record_number, MDB_val *key, MDB_val* data) {
static int handle_maindb(size_t record_number, MDB_val *key, MDB_val* data) { static int handle_maindb(size_t record_number, MDB_val *key, MDB_val* data) {
char *name; char *name;
int i, rc; int rc;
size_t i;
name = key->mv_data; name = key->mv_data;
for(i = 0; i < key->mv_size; ++i) { for(i = 0; i < key->mv_size; ++i) {
@ -880,7 +887,7 @@ int main(int argc, char *argv[])
print(", gc %zu (%.1f%%)", freedb_pages, freedb_pages / percent); print(", gc %zu (%.1f%%)", freedb_pages, freedb_pages / percent);
value = freedb_pages - reclaimable_pages; value = freedb_pages - reclaimable_pages;
print(", reading %zu (%.1f%%)", value, value / percent); print(", detained %zu (%.1f%%)", value, value / percent);
print(", reclaimable %zu (%.1f%%)", reclaimable_pages, reclaimable_pages / percent); print(", reclaimable %zu (%.1f%%)", reclaimable_pages, reclaimable_pages / percent);
} }

View File

@ -25,6 +25,7 @@
static void static void
sighandle(int sig) sighandle(int sig)
{ {
(void) sig;
} }
int main(int argc,char * argv[]) int main(int argc,char * argv[])

View File

@ -48,7 +48,8 @@ static volatile sig_atomic_t gotsig;
static void dumpsig( int sig ) static void dumpsig( int sig )
{ {
gotsig=1; (void) sig;
gotsig = 1;
} }
static const char hexc[] = "0123456789abcdef"; static const char hexc[] = "0123456789abcdef";

View File

@ -224,7 +224,7 @@ int main(int argc, char *argv[])
printf(" Free pages: %zu %.1f%%\n", value, value / percent); printf(" Free pages: %zu %.1f%%\n", value, value / percent);
value = pages - reclaimable; value = pages - reclaimable;
printf(" Reading: %zu %.1f%%\n", value, value / percent); printf(" Detained: %zu %.1f%%\n", value, value / percent);
value = reclaimable; value = reclaimable;
printf(" Reclaimable: %zu %.1f%%\n", value, value / percent); printf(" Reclaimable: %zu %.1f%%\n", value, value / percent);

5
midl.c
View File

@ -28,7 +28,6 @@
/** @defgroup idls ID List Management /** @defgroup idls ID List Management
* @{ * @{
*/ */
#define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) )
static unsigned __hot static unsigned __hot
mdb_midl_search( MDB_IDL ids, MDB_ID id ) mdb_midl_search( MDB_IDL ids, MDB_ID id )
@ -46,7 +45,7 @@ mdb_midl_search( MDB_IDL ids, MDB_ID id )
while( 0 < n ) { while( 0 < n ) {
unsigned pivot = n >> 1; unsigned pivot = n >> 1;
cursor = base + pivot + 1; cursor = base + pivot + 1;
val = CMP( ids[cursor], id ); val = mdbx_cmp2int( ids[cursor], id );
if( val < 0 ) { if( val < 0 ) {
n = pivot; n = pivot;
@ -295,7 +294,7 @@ mdb_mid2l_search( MDB_ID2L ids, MDB_ID id )
while( 0 < n ) { while( 0 < n ) {
unsigned pivot = n >> 1; unsigned pivot = n >> 1;
cursor = base + pivot + 1; cursor = base + pivot + 1;
val = CMP( id, ids[cursor].mid ); val = mdbx_cmp2int( id, ids[cursor].mid );
if( val < 0 ) { if( val < 0 ) {
n = pivot; n = pivot;

View File

@ -44,6 +44,8 @@ int main(int argc,char * argv[])
int env_oflags; int env_oflags;
struct stat db_stat, exe_stat; struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL)); srand(time(NULL));
count = (rand()%384) + 64; count = (rand()%384) + 64;

View File

@ -52,6 +52,8 @@ int main(int argc,char * argv[])
int env_oflags; int env_oflags;
struct stat db_stat, exe_stat; struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL)); srand(time(NULL));
count = (rand()%384) + 64; count = (rand()%384) + 64;
@ -118,22 +120,21 @@ int main(int argc,char * argv[])
mdb_txn_abort(txn); mdb_txn_abort(txn);
mdb_env_sync(env, 1); mdb_env_sync(env, 1);
j=0; int deleted = 0;
key.mv_data = sval; key.mv_data = sval;
for (i= count - 1; i > -1; i-= (rand()%5)) { for (i = count - 1; i > -1; i -= (rand()%5)) {
j++;
txn=NULL; txn=NULL;
E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_txn_begin(env, NULL, 0, &txn));
sprintf(sval, "%03x ", values[i]); sprintf(sval, "%03x ", values[i]);
if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, NULL))) { if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, NULL))) {
j--;
mdb_txn_abort(txn); mdb_txn_abort(txn);
} else { } else {
E(mdb_txn_commit(txn)); E(mdb_txn_commit(txn));
deleted++;
} }
} }
free(values); free(values);
printf("Deleted %d values\n", j); printf("Deleted %d values\n", deleted);
printf("check-preset-b.cursor-next\n"); printf("check-preset-b.cursor-next\n");
E(mdb_env_stat(env, &mst)); E(mdb_env_stat(env, &mst));
@ -147,7 +148,7 @@ int main(int argc,char * argv[])
++present_b; ++present_b;
} }
CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get");
CHECK(present_b == present_a - j, "mismatch"); CHECK(present_b == present_a - deleted, "mismatch");
printf("check-preset-b.cursor-prev\n"); printf("check-preset-b.cursor-prev\n");
j = 1; j = 1;
@ -182,7 +183,8 @@ int main(int argc,char * argv[])
++present_c; ++present_c;
} }
CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get"); CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get");
CHECK(present_c == present_a, "mismatch"); printf("Rolled back %d deletion(s)\n", present_c - (present_a - deleted));
CHECK(present_c > present_a - deleted, "mismatch");
printf("check-preset-d.cursor-prev\n"); printf("check-preset-d.cursor-prev\n");
j = 1; j = 1;

View File

@ -46,6 +46,8 @@ int main(int argc,char * argv[])
int env_oflags; int env_oflags;
struct stat db_stat, exe_stat; struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL)); srand(time(NULL));
count = (rand()%384) + 64; count = (rand()%384) + 64;

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags; int env_oflags;
struct stat db_stat, exe_stat; struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL)); srand(time(NULL));
memset(sval, 0, sizeof(sval)); memset(sval, 0, sizeof(sval));

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags; int env_oflags;
struct stat db_stat, exe_stat; struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
memset(sval, 0, sizeof(sval)); memset(sval, 0, sizeof(sval));
count = 510; count = 510;

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags; int env_oflags;
struct stat db_stat, exe_stat; struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL)); srand(time(NULL));
memset(sval, 0, sizeof(sval)); memset(sval, 0, sizeof(sval));

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags; int env_oflags;
struct stat db_stat, exe_stat; struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL)); srand(time(NULL));
E(mdb_env_create(&env)); E(mdb_env_create(&env));

View File

@ -70,6 +70,9 @@
#ifndef __hot #ifndef __hot
# if defined(NDEBUG) && (defined(__GNUC__) && !defined(__clang__)) # if defined(NDEBUG) && (defined(__GNUC__) && !defined(__clang__))
# define __hot __attribute__((hot, optimize("O3"))) # define __hot __attribute__((hot, optimize("O3")))
# elif defined(__GNUC__)
/* cland case, just put frequently used functions in separate section */
# define __hot __attribute__((section("text.hot")))
# else # else
# define __hot # define __hot
# endif # endif
@ -80,7 +83,7 @@
# define __cold __attribute__((cold, optimize("Os"))) # define __cold __attribute__((cold, optimize("Os")))
# elif defined(__GNUC__) # elif defined(__GNUC__)
/* cland case, just put infrequently used functions in separate section */ /* cland case, just put infrequently used functions in separate section */
# define __cold __attribute__((section("text.cold"))) # define __cold __attribute__((section("text.unlikely")))
# else # else
# define __cold # define __cold
# endif # endif

View File

@ -205,6 +205,9 @@ static void wbench(int flags, int mb, int count, int salt)
int main(int argc,char * argv[]) int main(int argc,char * argv[])
{ {
(void) argc;
(void) argv;
#define SALT 1 #define SALT 1
#define COUNT 10000 #define COUNT 10000
#define SIZE 12 #define SIZE 12

View File

@ -10,7 +10,7 @@
#include <assert.h> #include <assert.h>
#include "mdbx.h" #include "mdbx.h"
#define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff) #define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff)
char opt_db_path[PATH_MAX] = "/dev/shm/lmdb_bench1"; char opt_db_path[PATH_MAX] = "/dev/shm/lmdb_bench1";
static MDB_env *env; static MDB_env *env;
@ -23,74 +23,74 @@ int64_t lmdb_del = 0;
int64_t obj_id = 0; int64_t obj_id = 0;
static void add_id_to_pool(int64_t id) { static void add_id_to_pool(int64_t id) {
ids[ids_count] = id; ids[ids_count] = id;
ids_count++; ids_count++;
} }
static inline int64_t getTimeMicroseconds(void) { static inline int64_t getTimeMicroseconds(void) {
struct timeval val; struct timeval val;
gettimeofday(&val, NULL); gettimeofday(&val, NULL);
return val.tv_sec * ((int64_t) 1000000) + val.tv_usec; return val.tv_sec * ((int64_t) 1000000) + val.tv_usec;
} }
static int64_t get_id_from_pool() { static int64_t get_id_from_pool() {
if (ids_count == 0) { if (ids_count == 0) {
return -1; return -1;
} }
int32_t index = rand() % ids_count; int32_t index = rand() % ids_count;
int64_t id = ids[index]; int64_t id = ids[index];
ids[index] = ids[ids_count - 1]; ids[index] = ids[ids_count - 1];
ids_count--; ids_count--;
return id; return id;
} }
#define LMDB_CHECK(x) \ #define LMDB_CHECK(x) \
do {\ do {\
const int rc = (x);\ const int rc = (x);\
if ( rc != MDB_SUCCESS ) {\ if ( rc != MDB_SUCCESS ) {\
printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \ printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \
exit(EXIT_FAILURE); \ exit(EXIT_FAILURE); \
}\ }\
} while(0) } while(0)
static void db_connect() { static void db_connect() {
LMDB_CHECK(mdb_env_create(&env)); LMDB_CHECK(mdb_env_create(&env));
LMDB_CHECK(mdb_env_set_mapsize(env, 3L * 1024L * 1024L * 1024L)); LMDB_CHECK(mdb_env_set_mapsize(env, 3L * 1024L * 1024L * 1024L));
LMDB_CHECK(mdb_env_set_maxdbs(env, 30)); LMDB_CHECK(mdb_env_set_maxdbs(env, 30));
#if defined(MDB_LIFORECLAIM) #if defined(MDB_LIFORECLAIM)
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664)); LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664));
#else #else
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664)); LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664));
#endif #endif
printf("Connection open\n"); printf("Connection open\n");
} }
typedef struct { typedef struct {
char session_id1[100]; char session_id1[100];
char session_id2[100]; char session_id2[100];
char ip[20]; char ip[20];
uint8_t fill[100]; uint8_t fill[100];
} session_data_t; } session_data_t;
typedef struct { typedef struct {
int64_t obj_id; int64_t obj_id;
int8_t event_type; int8_t event_type;
} __attribute__((__packed__)) event_data_t; } __attribute__((__packed__)) event_data_t;
static void create_record(int64_t record_id) { static void create_record(int64_t record_id) {
MDB_dbi dbi_session; MDB_dbi dbi_session;
MDB_dbi dbi_session_id; MDB_dbi dbi_session_id;
MDB_dbi dbi_event; MDB_dbi dbi_event;
MDB_dbi dbi_ip; MDB_dbi dbi_ip;
event_data_t event; event_data_t event;
MDB_txn *txn; MDB_txn *txn;
session_data_t data; session_data_t data;
// transaction init // transaction init
snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id); snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id);
snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999); snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999);
snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF)); snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF));
event.obj_id = record_id; event.obj_id = record_id;
event.event_type = 1; event.event_type = 1;
MDB_val _session_id1_rec = {data.session_id1, strlen(data.session_id1)}; MDB_val _session_id1_rec = {data.session_id1, strlen(data.session_id1)};
MDB_val _session_id2_rec = {data.session_id2, strlen(data.session_id2)}; MDB_val _session_id2_rec = {data.session_id2, strlen(data.session_id2)};
@ -99,139 +99,142 @@ static void create_record(int64_t record_id) {
MDB_val _data_rec = {&data, offsetof(session_data_t, fill) + (rand() % sizeof (data.fill))}; MDB_val _data_rec = {&data, offsetof(session_data_t, fill) + (rand() % sizeof (data.fill))};
MDB_val _event_rec = {&event, sizeof (event)}; MDB_val _event_rec = {&event, sizeof (event)};
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn)); LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session)); LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id)); LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event)); LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip)); LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA)); LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA)); LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA)); LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0)); LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0));
LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0)); LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0));
// transaction commit // transaction commit
LMDB_CHECK(mdb_txn_commit(txn)); LMDB_CHECK(mdb_txn_commit(txn));
lmdb_add++; lmdb_add++;
} }
static void delete_record(int64_t record_id) { static void delete_record(int64_t record_id) {
MDB_dbi dbi_session; MDB_dbi dbi_session;
MDB_dbi dbi_session_id; MDB_dbi dbi_session_id;
MDB_dbi dbi_event; MDB_dbi dbi_event;
MDB_dbi dbi_ip; MDB_dbi dbi_ip;
event_data_t event; event_data_t event;
MDB_txn *txn; MDB_txn *txn;
// transaction init // transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn)); LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode // open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session)); LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id)); LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event)); LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip)); LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// put data // put data
MDB_val _obj_id_rec = {&record_id, sizeof(record_id)}; MDB_val _obj_id_rec = {&record_id, sizeof(record_id)};
MDB_val v_rec; MDB_val v_rec;
// get data // get data
LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &v_rec)); LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &v_rec));
session_data_t* data = (session_data_t*) v_rec.mv_data; session_data_t* data = (session_data_t*) v_rec.mv_data;
MDB_val _session_id1_rec = {data->session_id1, strlen(data->session_id1)}; MDB_val _session_id1_rec = {data->session_id1, strlen(data->session_id1)};
MDB_val _session_id2_rec = {data->session_id2, strlen(data->session_id2)}; MDB_val _session_id2_rec = {data->session_id2, strlen(data->session_id2)};
MDB_val _ip_rec = {data->ip, strlen(data->ip)}; MDB_val _ip_rec = {data->ip, strlen(data->ip)};
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL));
event.obj_id = record_id; event.obj_id = record_id;
event.event_type = 1; event.event_type = 1;
MDB_val _event_rec = {&event, sizeof(event)}; MDB_val _event_rec = {&event, sizeof(event)};
LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL));
// transaction commit // transaction commit
LMDB_CHECK(mdb_txn_commit(txn)); LMDB_CHECK(mdb_txn_commit(txn));
lmdb_del++; lmdb_del++;
} }
static void db_disconnect() { static void db_disconnect() {
mdb_env_close(env); mdb_env_close(env);
printf("Connection closed\n"); printf("Connection closed\n");
} }
static void get_db_stat(const char* db, int64_t* ms_branch_pages, int64_t* ms_leaf_pages) { static void get_db_stat(const char* db, int64_t* ms_branch_pages, int64_t* ms_leaf_pages) {
MDB_txn *txn; MDB_txn *txn;
MDB_stat stat; MDB_stat stat;
MDB_dbi dbi; MDB_dbi dbi;
LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn)); LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi)); LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi));
LMDB_CHECK(mdb_stat(txn, dbi, &stat)); LMDB_CHECK(mdb_stat(txn, dbi, &stat));
mdb_txn_abort(txn); mdb_txn_abort(txn);
printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n", printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n",
db, db,
stat.ms_branch_pages, stat.ms_branch_pages,
stat.ms_depth, stat.ms_depth,
stat.ms_entries, stat.ms_entries,
stat.ms_leaf_pages, stat.ms_leaf_pages,
stat.ms_overflow_pages); stat.ms_overflow_pages);
(*ms_branch_pages) += stat.ms_branch_pages; (*ms_branch_pages) += stat.ms_branch_pages;
(*ms_leaf_pages) += stat.ms_leaf_pages; (*ms_leaf_pages) += stat.ms_leaf_pages;
} }
static void periodic_stat(void) { static void periodic_stat(void) {
int64_t ms_branch_pages = 0; int64_t ms_branch_pages = 0;
int64_t ms_leaf_pages = 0; int64_t ms_leaf_pages = 0;
printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n"); printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n");
get_db_stat("session", &ms_branch_pages, &ms_leaf_pages); get_db_stat("session", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages); get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("event", &ms_branch_pages, &ms_leaf_pages); get_db_stat("event", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages); get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages);
printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, ""); printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, "");
static int64_t prev_add; static int64_t prev_add;
static int64_t prev_del; static int64_t prev_del;
static int64_t t = -1; static int64_t t = -1;
if (t > 0) { if (t > 0) {
int64_t delta = getTimeMicroseconds() - t; int64_t delta = getTimeMicroseconds() - t;
printf("CPS: add %ld, delete %ld, items processed - %ld\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id); printf("CPS: add %ld, delete %ld, items processed - %ld\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id);
} }
t = getTimeMicroseconds(); t = getTimeMicroseconds();
prev_add = lmdb_add; prev_add = lmdb_add;
prev_del = lmdb_del; prev_del = lmdb_del;
} }
static void periodic_add_rec() { static void periodic_add_rec() {
int i; int i;
for (i = 0; i < 10000; i++) { for (i = 0; i < 10000; i++) {
if (ids_count <= REC_COUNT) { if (ids_count <= REC_COUNT) {
int64_t id = obj_id++; int64_t id = obj_id++;
create_record(id); create_record(id);
add_id_to_pool(id); add_id_to_pool(id);
} }
if (ids_count > REC_COUNT) { if (ids_count > REC_COUNT) {
int64_t id = get_id_from_pool(); int64_t id = get_id_from_pool();
delete_record(id); delete_record(id);
} }
} }
periodic_stat(); periodic_stat();
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
char filename[PATH_MAX]; (void) argc;
mkdir(opt_db_path, 0775); (void) argv;
strcpy(filename, opt_db_path); char filename[PATH_MAX];
strcat(filename, "/data.mdb"); mkdir(opt_db_path, 0775);
remove(filename);
strcpy(filename, opt_db_path); strcpy(filename, opt_db_path);
strcat(filename, "/lock.mdb"); strcat(filename, "/data.mdb");
remove(filename); remove(filename);
db_connect(); strcpy(filename, opt_db_path);
while (1) { strcat(filename, "/lock.mdb");
periodic_add_rec(); remove(filename);
}
db_disconnect(); db_connect();
return 0; while (1) {
periodic_add_rec();
}
db_disconnect();
return 0;
} }

View File

@ -10,7 +10,7 @@
#include <assert.h> #include <assert.h>
#include "mdbx.h" #include "mdbx.h"
#define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff) #define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff)
char opt_db_path[PATH_MAX] = "/dev/shm/lmdb_bench2"; char opt_db_path[PATH_MAX] = "/dev/shm/lmdb_bench2";
static MDB_env *env; static MDB_env *env;
@ -25,90 +25,90 @@ int64_t lmdb_data_size = 0;
int64_t lmdb_key_size = 0; int64_t lmdb_key_size = 0;
static void add_id_to_pool(int64_t id) { static void add_id_to_pool(int64_t id) {
ids[ids_count] = id; ids[ids_count] = id;
ids_count++; ids_count++;
} }
static inline int64_t getTimeMicroseconds(void) { static inline int64_t getTimeMicroseconds(void) {
struct timeval val; struct timeval val;
gettimeofday(&val, NULL); gettimeofday(&val, NULL);
return val.tv_sec * ((int64_t) 1000000) + val.tv_usec; return val.tv_sec * ((int64_t) 1000000) + val.tv_usec;
} }
static int64_t get_id_from_pool() { static int64_t get_id_from_pool() {
if (ids_count == 0) { if (ids_count == 0) {
return -1; return -1;
} }
int32_t index = rand() % ids_count; int32_t index = rand() % ids_count;
int64_t id = ids[index]; int64_t id = ids[index];
ids[index] = ids[ids_count - 1]; ids[index] = ids[ids_count - 1];
ids_count--; ids_count--;
return id; return id;
} }
#define LMDB_CHECK(x) \ #define LMDB_CHECK(x) \
do {\ do {\
const int rc = (x);\ const int rc = (x);\
if ( rc != MDB_SUCCESS ) {\ if ( rc != MDB_SUCCESS ) {\
printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \ printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \
exit(EXIT_FAILURE); \ exit(EXIT_FAILURE); \
}\ }\
} while(0) } while(0)
static void db_connect() { static void db_connect() {
MDB_dbi dbi_session; MDB_dbi dbi_session;
MDB_dbi dbi_session_id; MDB_dbi dbi_session_id;
MDB_dbi dbi_event; MDB_dbi dbi_event;
MDB_dbi dbi_ip; MDB_dbi dbi_ip;
LMDB_CHECK(mdb_env_create(&env)); LMDB_CHECK(mdb_env_create(&env));
LMDB_CHECK(mdb_env_set_mapsize(env, 300000L * 4096L)); LMDB_CHECK(mdb_env_set_mapsize(env, 300000L * 4096L));
LMDB_CHECK(mdb_env_set_maxdbs(env, 30)); LMDB_CHECK(mdb_env_set_maxdbs(env, 30));
#if defined(MDB_LIFORECLAIM) #if defined(MDB_LIFORECLAIM)
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664)); LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664));
#else #else
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664)); LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664));
#endif #endif
MDB_txn *txn; MDB_txn *txn;
// transaction init // transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn)); LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode // open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session)); LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id)); LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event)); LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip)); LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// transaction commit // transaction commit
LMDB_CHECK(mdb_txn_commit(txn)); LMDB_CHECK(mdb_txn_commit(txn));
printf("Connection open\n"); printf("Connection open\n");
} }
typedef struct { typedef struct {
char session_id1[100]; char session_id1[100];
char session_id2[100]; char session_id2[100];
char ip[20]; char ip[20];
uint8_t fill[100]; uint8_t fill[100];
} session_data_t; } session_data_t;
typedef struct { typedef struct {
int64_t obj_id; int64_t obj_id;
int8_t event_type; int8_t event_type;
} __attribute__((__packed__)) event_data_t; } __attribute__((__packed__)) event_data_t;
static void create_record(int64_t record_id) { static void create_record(int64_t record_id) {
MDB_dbi dbi_session; MDB_dbi dbi_session;
MDB_dbi dbi_session_id; MDB_dbi dbi_session_id;
MDB_dbi dbi_event; MDB_dbi dbi_event;
MDB_dbi dbi_ip; MDB_dbi dbi_ip;
event_data_t event; event_data_t event;
MDB_txn *txn; MDB_txn *txn;
session_data_t data; session_data_t data;
// transaction init // transaction init
snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id); snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id);
snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999); snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999);
snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF)); snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF));
event.obj_id = record_id; event.obj_id = record_id;
event.event_type = 1; event.event_type = 1;
MDB_val _session_id1_rec = {data.session_id1, strlen(data.session_id1)}; MDB_val _session_id1_rec = {data.session_id1, strlen(data.session_id1)};
MDB_val _session_id2_rec = {data.session_id2, strlen(data.session_id2)}; MDB_val _session_id2_rec = {data.session_id2, strlen(data.session_id2)};
@ -117,171 +117,174 @@ static void create_record(int64_t record_id) {
MDB_val _data_rec = {&data, offsetof(session_data_t, fill) + (rand() % sizeof (data.fill))}; MDB_val _data_rec = {&data, offsetof(session_data_t, fill) + (rand() % sizeof (data.fill))};
MDB_val _event_rec = {&event, sizeof(event)}; MDB_val _event_rec = {&event, sizeof(event)};
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn)); LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session)); LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id)); LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event)); LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip)); LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA)); LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA)); LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA)); LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0)); LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0));
LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0)); LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0));
lmdb_data_size += (_data_rec.mv_size + _obj_id_rec.mv_size * 4); lmdb_data_size += (_data_rec.mv_size + _obj_id_rec.mv_size * 4);
lmdb_key_size += (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size); lmdb_key_size += (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size);
// transaction commit // transaction commit
LMDB_CHECK(mdb_txn_commit(txn)); LMDB_CHECK(mdb_txn_commit(txn));
lmdb_add++; lmdb_add++;
} }
static void delete_record(int64_t record_id) { static void delete_record(int64_t record_id) {
MDB_dbi dbi_session; MDB_dbi dbi_session;
MDB_dbi dbi_session_id; MDB_dbi dbi_session_id;
MDB_dbi dbi_event; MDB_dbi dbi_event;
MDB_dbi dbi_ip; MDB_dbi dbi_ip;
event_data_t event; event_data_t event;
MDB_txn *txn; MDB_txn *txn;
// transaction init // transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn)); LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode // open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session)); LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id)); LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event)); LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip)); LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// put data // put data
MDB_val _obj_id_rec = {&record_id, sizeof(record_id)}; MDB_val _obj_id_rec = {&record_id, sizeof(record_id)};
MDB_val _data_rec; MDB_val _data_rec;
// get data // get data
LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &_data_rec)); LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &_data_rec));
session_data_t* data = (session_data_t*) _data_rec.mv_data; session_data_t* data = (session_data_t*) _data_rec.mv_data;
MDB_val _session_id1_rec = {data->session_id1, strlen(data->session_id1)}; MDB_val _session_id1_rec = {data->session_id1, strlen(data->session_id1)};
MDB_val _session_id2_rec = {data->session_id2, strlen(data->session_id2)}; MDB_val _session_id2_rec = {data->session_id2, strlen(data->session_id2)};
MDB_val _ip_rec = {data->ip, strlen(data->ip)}; MDB_val _ip_rec = {data->ip, strlen(data->ip)};
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL));
event.obj_id = record_id; event.obj_id = record_id;
event.event_type = 1; event.event_type = 1;
MDB_val _event_rec = {&event, sizeof(event)}; MDB_val _event_rec = {&event, sizeof(event)};
LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL)); LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL));
lmdb_data_size -= (_data_rec.mv_size + _obj_id_rec.mv_size * 4); lmdb_data_size -= (_data_rec.mv_size + _obj_id_rec.mv_size * 4);
lmdb_key_size -= (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size); lmdb_key_size -= (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size);
// transaction commit // transaction commit
LMDB_CHECK(mdb_txn_commit(txn)); LMDB_CHECK(mdb_txn_commit(txn));
lmdb_del++; lmdb_del++;
} }
static void db_disconnect() { static void db_disconnect() {
mdb_env_close(env); mdb_env_close(env);
printf("Connection closed\n"); printf("Connection closed\n");
} }
static void get_db_stat(const char* db, int64_t* ms_branch_pages, int64_t* ms_leaf_pages) { static void get_db_stat(const char* db, int64_t* ms_branch_pages, int64_t* ms_leaf_pages) {
MDB_txn *txn; MDB_txn *txn;
MDB_stat stat; MDB_stat stat;
MDB_dbi dbi; MDB_dbi dbi;
LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn)); LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi)); LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi));
LMDB_CHECK(mdb_stat(txn, dbi, &stat)); LMDB_CHECK(mdb_stat(txn, dbi, &stat));
mdb_txn_abort(txn); mdb_txn_abort(txn);
printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n", printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n",
db, db,
stat.ms_branch_pages, stat.ms_branch_pages,
stat.ms_depth, stat.ms_depth,
stat.ms_entries, stat.ms_entries,
stat.ms_leaf_pages, stat.ms_leaf_pages,
stat.ms_overflow_pages); stat.ms_overflow_pages);
(*ms_branch_pages) += stat.ms_branch_pages; (*ms_branch_pages) += stat.ms_branch_pages;
(*ms_leaf_pages) += stat.ms_leaf_pages; (*ms_leaf_pages) += stat.ms_leaf_pages;
} }
static void periodic_stat(void) { static void periodic_stat(void) {
int64_t ms_branch_pages = 0; int64_t ms_branch_pages = 0;
int64_t ms_leaf_pages = 0; int64_t ms_leaf_pages = 0;
printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n"); printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n");
get_db_stat("session", &ms_branch_pages, &ms_leaf_pages); get_db_stat("session", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages); get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("event", &ms_branch_pages, &ms_leaf_pages); get_db_stat("event", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages); get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages);
printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, ""); printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, "");
static int64_t prev_add; static int64_t prev_add;
static int64_t prev_del; static int64_t prev_del;
static int64_t t = -1; static int64_t t = -1;
if (t > 0) { if (t > 0) {
int64_t delta = getTimeMicroseconds() - t; int64_t delta = getTimeMicroseconds() - t;
printf("CPS: add %ld, delete %ld, items processed - %ldK data=%ldK key=%ldK\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id / 1024, lmdb_data_size / 1024, lmdb_key_size / 1024); printf("CPS: add %ld, delete %ld, items processed - %ldK data=%ldK key=%ldK\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id / 1024, lmdb_data_size / 1024, lmdb_key_size / 1024);
printf("usage data=%ld%%\n", ((lmdb_data_size + lmdb_key_size) * 100) / ((ms_leaf_pages + ms_branch_pages)*4096)); printf("usage data=%ld%%\n", ((lmdb_data_size + lmdb_key_size) * 100) / ((ms_leaf_pages + ms_branch_pages)*4096));
} }
t = getTimeMicroseconds(); t = getTimeMicroseconds();
prev_add = lmdb_add; prev_add = lmdb_add;
prev_del = lmdb_del; prev_del = lmdb_del;
} }
//static void periodic_add_rec() { //static void periodic_add_rec() {
// for (int i = 0; i < 10240; i++) { // for (int i = 0; i < 10240; i++) {
// if (ids_count <= REC_COUNT) { // if (ids_count <= REC_COUNT) {
// int64_t id = obj_id++; // int64_t id = obj_id++;
// create_record(id); // create_record(id);
// add_id_to_pool(id); // add_id_to_pool(id);
// } // }
// if (ids_count > REC_COUNT) { // if (ids_count > REC_COUNT) {
// int64_t id = get_id_from_pool(); // int64_t id = get_id_from_pool();
// delete_record(id); // delete_record(id);
// } // }
// } // }
// periodic_stat(); // periodic_stat();
//} //}
int main(int argc, char** argv) { int main(int argc, char** argv) {
char filename[PATH_MAX]; (void) argc;
int i; (void) argv;
int64_t t;
mkdir(opt_db_path, 0775); char filename[PATH_MAX];
int i;
int64_t t;
strcpy(filename, opt_db_path); mkdir(opt_db_path, 0775);
strcat(filename, "/data.mdb");
remove(filename);
strcpy(filename, opt_db_path); strcpy(filename, opt_db_path);
strcat(filename, "/lock.mdb"); strcat(filename, "/data.mdb");
remove(filename); remove(filename);
db_connect(); strcpy(filename, opt_db_path);
periodic_stat(); strcat(filename, "/lock.mdb");
for (i = 0; i < 1024000; i++) { remove(filename);
int64_t id = obj_id++;
create_record(id); db_connect();
add_id_to_pool(id); periodic_stat();
} for (i = 0; i < 1024000; i++) {
periodic_stat(); int64_t id = obj_id++;
t = getTimeMicroseconds(); create_record(id);
while (1) { add_id_to_pool(id);
int i; }
int64_t now; periodic_stat();
for (i = 0; i < 100; i++) { t = getTimeMicroseconds();
int64_t id = obj_id++; while (1) {
create_record(id); int i;
add_id_to_pool(id); int64_t now;
id = get_id_from_pool(); for (i = 0; i < 100; i++) {
delete_record(id); int64_t id = obj_id++;
} create_record(id);
//int64_t id = obj_id++; add_id_to_pool(id);
//create_record(id); id = get_id_from_pool();
//add_id_to_pool(id); delete_record(id);
now = getTimeMicroseconds(); }
if ((now - t) > 100000) { //int64_t id = obj_id++;
periodic_stat(); //create_record(id);
t = now; //add_id_to_pool(id);
} now = getTimeMicroseconds();
} if ((now - t) > 100000) {
db_disconnect(); periodic_stat();
return 0; t = now;
}
}
db_disconnect();
return 0;
} }