mdbx: Merge branch 'master' into nexenta.

This commit is contained in:
Leo Yuriev 2016-04-28 08:21:43 +03:00
commit 0a65b26c58
18 changed files with 563 additions and 482 deletions

View File

@ -18,8 +18,8 @@ prefix ?= /usr/local
mandir ?= $(prefix)/man
CC ?= gcc
XCFLAGS ?=
CFLAGS ?= -O2 -ggdb3 -Wall -Werror -DNDEBUG=1
XCFLAGS ?= -DNDEBUG=1 -DMDB_DEBUG=0
CFLAGS ?= -O2 -g3 -Wall -Werror -Wextra
CFLAGS += -pthread $(XCFLAGS)
IOARENA ?= ../ioarena.git/@BUILD/src/ioarena
@ -76,7 +76,7 @@ libmdbx.a: mdbx.o
$(AR) rs $@ $^
libmdbx.so: mdbx.lo
$(CC) $(CFLAGS) $(LDFLAGS) -pthread -shared -o $@ $^
$(CC) $(CFLAGS) $(LDFLAGS) -save-temps -pthread -shared -o $@ $^
liblmdb.a: lmdb.o
$(AR) rs $@ $^
@ -168,23 +168,28 @@ ifneq ($(wildcard $(IOARENA)),)
.PHONY: bench clean-bench re-bench
bench: bench-lmdb.txt bench-mdbx.txt
clean-bench:
rm -rf bench-*.txt _ioarena
rm -rf bench-*.txt _ioarena/*
re-bench: clean-bench bench
NN := 25000000
define bench-rule
bench-$(1).txt: $(3) $(IOARENA) Makefile
$(IOARENA) -D $(1) -B crud -m nosync -n $(2) | tee $$@ | grep throughput \
&& $(IOARENA) -D $(1) -B get,iterate -m sync -r 4 -n $(2) | tee -a $$@ | grep throughput \
|| rm -f $$@
endef
$(eval $(call bench-rule,mdbx,$(NN),libmdbx.so))
$(eval $(call bench-rule,lmdb,$(NN)))
$(eval $(call bench-rule,dummy,$(NN)))
$(eval $(call bench-rule,debug,10))
bench: bench-lmdb.txt bench-mdbx.txt
bench-mdbx.txt: libmdbx.so $(IOARENA)
$(IOARENA) -D mdbx -B crud -m nosync -n 10000000 | tee $@ \
&& $(IOARENA) -D mdbx -B get,iterate -m sync -r 4 -n 10000000 | tee -a $@ \
|| rm -f $@
bench-lmdb.txt: $(IOARENA)
$(IOARENA) -D lmdb -B crud -m nosync -n 10000000 | tee $@ \
&& $(IOARENA) -D lmdb -B get,iterate -m sync -r 4 -n 10000000 | tee -a $@ \
|| rm -f $@
endif

252
mdb.c
View File

@ -1902,7 +1902,7 @@ mdb_meta_head_r(MDB_env *env) {
} else if (likely(b->mm_txnid == head_txnid)) {
h = b;
} else {
/* LY: seems got a race with mdb_env_sync0() */
/* LY: seems got a collision with mdb_env_sync0() */
mdb_coherent_barrier();
head_txnid = env->me_txns->mti_txnid;
mdb_assert(env, a->mm_txnid != b->mm_txnid || head_txnid == 0);
@ -2114,7 +2114,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp, int flags)
goto fail;
}
for (;;) { /* oomkick retry loop */
for (;;) { /* oom-kick retry loop */
found_old = 0;
for (op = MDB_FIRST;; op = (flags & MDB_LIFORECLAIM) ? MDB_PREV : MDB_NEXT) {
MDB_val key, data;
@ -2321,6 +2321,12 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp, int flags)
* utterly no-sync write mode was requested. In such case
* don't make a steady-sync, but only a legacy-mode checkpoint,
* just for resume reclaiming only, not for data consistency. */
mdb_debug("kick-gc: head %zu/%c, tail %zu/%c, oldest %zu, txnid %zu",
head->mm_txnid, META_IS_WEAK(head) ? 'W' : 'N',
tail->mm_txnid, META_IS_WEAK(tail) ? 'W' : 'N',
oldest, env->me_txns->mt1.mtb.mtb_txnid );
int flags = env->me_flags & MDB_WRITEMAP;
if ((env->me_flags & MDB_UTTERLY_NOSYNC) == MDB_UTTERLY_NOSYNC)
flags |= MDB_UTTERLY_NOSYNC;
@ -3273,7 +3279,7 @@ static MDB_INLINE int
mdb_backlog_size(MDB_txn *txn)
{
int reclaimed = txn->mt_env->me_pghead ? txn->mt_env->me_pghead[0] : 0;
return reclaimed += txn->mt_loose_count;
return reclaimed + txn->mt_loose_count;
}
/* LY: Prepare a backlog of pages to modify FreeDB itself,
@ -3282,30 +3288,20 @@ mdb_backlog_size(MDB_txn *txn)
static int
mdb_prep_backlog(MDB_txn *txn, MDB_cursor *mc)
{
/* LY: Critical level (1) for copy a one leaf-page.
* But also (+2) for split leaf-page into a couple with creation
* one branch-page (for ability of insertion and my paranoia). */
int minimal_level = 3;
/* LY: extra page(s) for b-tree rebalancing */
const int extra = (txn->mt_env->me_flags & MDB_LIFORECLAIM) ? 2 : 1;
/* LY: Safe level for update branch-pages from root */
int safe_level = minimal_level + 8;
if (mdb_backlog_size(txn) < safe_level) {
/* Make sure "hot" pages of freeDB is touched and on freelist */
if (mdb_backlog_size(txn) < mc->mc_db->md_depth + extra) {
int rc = mdb_cursor_touch(mc);
if (unlikely(rc))
return rc;
while (mdb_backlog_size(txn) < minimal_level) {
MDB_page *mp = NULL;
rc = mdb_page_alloc(mc, 1, &mp, MDB_ALLOC_GC | MDB_ALLOC_NEW);
if (unlikely(rc))
return rc;
if (mp) {
NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
txn->mt_loose_pgs = mp;
txn->mt_loose_count++;
mp->mp_flags |= P_LOOSE;
while (unlikely(mdb_backlog_size(txn) < extra)) {
rc = mdb_page_alloc(mc, 1, NULL, MDB_ALLOC_GC);
if (unlikely(rc)) {
if (unlikely(rc != MDB_NOTFOUND))
return rc;
break;
}
}
}
@ -3462,7 +3458,7 @@ again:
/* LY: need more just a txn-id for save page list. */
rc = mdb_page_alloc(&mc, 0, NULL, MDB_ALLOC_GC);
if (likely(rc == 0))
/* LY: ок, reclaimed from freedb. */
/* LY: ok, reclaimed from freedb. */
continue;
if (unlikely(rc != MDB_NOTFOUND))
/* LY: other troubles... */
@ -4134,17 +4130,17 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
int rc;
MDB_meta* head = mdb_meta_head_w(env);
size_t prev_mapsize = head->mm_mapsize;
MDB_meta* tail = META_IS_WEAK(head) ? head : mdb_env_meta_flipflop(env, head);
off_t offset = (char*) tail - env->me_map;
volatile MDB_meta* target = META_IS_WEAK(head) ? head : mdb_env_meta_flipflop(env, head);
off_t offset = (char*) target - env->me_map;
size_t used_size = env->me_psize * (pending->mm_last_pg + 1);
mdb_assert(env, (env->me_flags & (MDB_RDONLY | MDB_FATAL_ERROR)) == 0);
mdb_assert(env, META_IS_WEAK(head) || env->me_sync_pending != 0
|| env->me_mapsize != prev_mapsize);
mdb_assert(env, pending->mm_txnid > head->mm_txnid || META_IS_WEAK(head));
mdb_assert(env, pending->mm_txnid > tail->mm_txnid || META_IS_WEAK(tail));
mdb_assert(env, pending->mm_txnid > target->mm_txnid || META_IS_WEAK(target));
MDB_meta* stay = mdb_env_meta_flipflop(env, tail);
MDB_meta* stay = mdb_env_meta_flipflop(env, (MDB_meta*) target);
mdb_assert(env, pending->mm_txnid > stay->mm_txnid);
pending->mm_mapsize = env->me_mapsize;
@ -4172,7 +4168,7 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
if ((flags & MDB_MAPASYNC) == 0)
env->me_sync_pending = 0;
} else {
int (*sync_fd)(int fd) = fdatasync;
int (*flush)(int fd) = fdatasync;
if (unlikely(prev_mapsize != pending->mm_mapsize)) {
/* LY: It is no reason to use fdatasync() here, even in case
* no such bug in a kernel. Because "no-bug" mean that a kernel
@ -4184,9 +4180,9 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
*
* For more info about of a corresponding fdatasync() bug
* see http://www.spinics.net/lists/linux-ext4/msg33714.html */
sync_fd = fsync;
flush = fsync;
}
while(unlikely(sync_fd(env->me_fd) < 0)) {
while(unlikely(flush(env->me_fd) < 0)) {
rc = errno;
if (rc != EINTR)
goto undo;
@ -4203,23 +4199,27 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
(flags & MDB_UTTERLY_NOSYNC) == MDB_UTTERLY_NOSYNC
? MDB_DATASIGN_NONE : MDB_DATASIGN_WEAK;
}
mdb_debug("writing meta page %d for root page %zu",
offset >= env->me_psize, pending->mm_dbs[MAIN_DBI].md_root);
mdb_debug("writing meta %d, root %zu, txn_id %zu, %s",
offset >= env->me_psize, pending->mm_dbs[MAIN_DBI].md_root,
pending->mm_txnid,
META_IS_WEAK(pending) ? "Weak" : META_IS_STEADY(pending) ? "Steady" : "Legacy" );
if (env->me_flags & MDB_WRITEMAP) {
#ifdef __SANITIZE_THREAD__
pthread_mutex_lock(&tsan_mutex);
#endif
tail->mm_datasync_sign = MDB_DATASIGN_WEAK;
tail->mm_txnid = 0;
mdb_coherent_barrier();
tail->mm_mapsize = pending->mm_mapsize;
tail->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI];
tail->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI];
tail->mm_last_pg = pending->mm_last_pg;
/* (LY) ITS#7969: issue a memory barrier, it is noop for x86. */
mdb_coherent_barrier();
tail->mm_txnid = pending->mm_txnid;
tail->mm_datasync_sign = pending->mm_datasync_sign;
/* LY: 'invalidate' the meta,
* but mdb_meta_head_r() will be confused/retired in collision case. */
target->mm_datasync_sign = MDB_DATASIGN_WEAK;
target->mm_txnid = 0;
/* LY: update info */
target->mm_mapsize = pending->mm_mapsize;
target->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI];
target->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI];
target->mm_last_pg = pending->mm_last_pg;
/* LY: 'commit' the meta */
target->mm_txnid = pending->mm_txnid;
target->mm_datasync_sign = pending->mm_datasync_sign;
} else {
pending->mm_magic = MDB_MAGIC;
pending->mm_version = MDB_DATA_VERSION;
@ -4235,7 +4235,7 @@ mdb_env_sync0(MDB_env *env, unsigned flags, MDB_meta *pending)
mdb_debug("write failed, disk error?");
/* On a failure, the pagecache still contains the new data.
* Write some old data back, to prevent it from being used. */
if (pwrite(env->me_fd, tail, sizeof(MDB_meta), offset) == sizeof(MDB_meta)) {
if (pwrite(env->me_fd, (void*) target, sizeof(MDB_meta), offset) == sizeof(MDB_meta)) {
/* LY: take a chance, if write succeeds at a magic ;) */
goto retry;
}
@ -4319,7 +4319,7 @@ mdb_env_create(MDB_env **env)
}
static int __cold
mdb_env_map(MDB_env *env, void *addr)
mdb_env_map(MDB_env *env, void *addr, size_t usedsize)
{
unsigned flags = env->me_flags;
@ -4336,16 +4336,17 @@ mdb_env_map(MDB_env *env, void *addr)
return errno;
}
if (flags & MDB_NORDAHEAD) {
/* Turn off readahead. It's harmful when the DB is larger than RAM. */
if (madvise(env->me_map, env->me_mapsize, MADV_RANDOM) < 0)
return errno;
/* Can happen because the address argument to mmap() is just a
* hint. mmap() can pick another, e.g. if the range is in use.
* The MAP_FIXED flag would prevent that, but then mmap could
* instead unmap existing pages to make room for the new map.
*/
if (addr && env->me_map != addr) {
errno = 0; /* LY: clean errno as a hit for this case */
return EBUSY; /* TODO: Make a new MDB_* error code? */
}
if (madvise(env->me_map, env->me_mapsize, MADV_DONTFORK) < 0)
return errno;
if (madvise(env->me_map, env->me_mapsize, MADV_WILLNEED) < 0)
if (madvise(env->me_map, env->me_mapsize, MADV_DONTFORK))
return errno;
#ifdef MADV_NOHUGEPAGE
@ -4358,15 +4359,16 @@ mdb_env_map(MDB_env *env, void *addr)
}
#endif
/* Can happen because the address argument to mmap() is just a
* hint. mmap() can pick another, e.g. if the range is in use.
* The MAP_FIXED flag would prevent that, but then mmap could
* instead unmap existing pages to make room for the new map.
*/
if (addr && env->me_map != addr) {
errno = 0; /* LY: clean errno as a hit for this case */
return EBUSY; /* TODO: Make a new MDB_* error code? */
#ifdef MADV_REMOVE
if (flags & MDB_WRITEMAP) {
assert(used_edge < env->me_mapsize);
(void) madvise(env->me_map + usedsize, env->me_mapsize - usedsize, MADV_REMOVE);
}
#endif
/* Turn on/off readahead. It's harmful when the DB is larger than RAM. */
if (madvise(env->me_map, env->me_mapsize, (flags & MDB_NORDAHEAD) ? MADV_RANDOM : MADV_WILLNEED))
return errno;
/* Lock meta pages to avoid unexpected write,
* before the data pages would be synchronized. */
@ -4374,8 +4376,8 @@ mdb_env_map(MDB_env *env, void *addr)
return errno;
#ifdef USE_VALGRIND
env->me_valgrind_handle = VALGRIND_CREATE_BLOCK(
env->me_map, env->me_mapsize, "lmdb");
env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_mapsize, "lmdb");
#endif
return MDB_SUCCESS;
@ -4405,12 +4407,10 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
meta = mdb_meta_head_w(env);
if (!size)
size = meta->mm_mapsize;
{
/* Silently round up to minimum if the size is too small */
size_t minsize = (meta->mm_last_pg + 1) * env->me_psize;
if (size < minsize)
size = minsize;
}
/* Silently round up to minimum if the size is too small */
const size_t usedsize = (meta->mm_last_pg + 1) * env->me_psize;
if (size < usedsize)
size = usedsize;
munmap(env->me_map, env->me_mapsize);
#ifdef USE_VALGRIND
VALGRIND_DISCARD(env->me_valgrind_handle);
@ -4418,7 +4418,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
#endif
env->me_mapsize = size;
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
rc = mdb_env_map(env, old);
rc = mdb_env_map(env, old, usedsize);
if (rc)
return rc;
}
@ -4536,7 +4536,8 @@ mdb_env_open2(MDB_env *env, MDB_meta *meta)
newenv = 0;
}
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta->mm_address : NULL);
const size_t usedsize = (meta->mm_last_pg + 1) * env->me_psize;
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta->mm_address : NULL, usedsize);
if (rc)
return rc;
@ -4869,6 +4870,13 @@ mdbx_env_open_ex(MDB_env *env, const char *path, unsigned flags, mode_t mode, in
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDB_VERSION_MISMATCH;
#if MDB_LIFORECLAIM
/* LY: don't allow LIFO with just NOMETASYNC */
if ((flags & (MDB_NOMETASYNC | MDB_LIFORECLAIM | MDB_NOSYNC))
== (MDB_NOMETASYNC | MDB_LIFORECLAIM))
return EINVAL;
#endif /* MDB_LIFORECLAIM */
if (env->me_fd != INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
return EINVAL;
@ -5129,7 +5137,11 @@ mdb_env_close(MDB_env *env)
* | 1, a > b
* \
*/
#define mdbx_cmp2int(a, b) (((a) > (b)) - ((b) > (a)))
#if 1
# define mdbx_cmp2int(a, b) (((b) > (a)) ? -1 : (a) > (b))
#else
# define mdbx_cmp2int(a, b) (((a) > (b)) - ((b) > (a)))
#endif
/** Compare two items pointing at aligned unsigned int's. */
static int __hot
@ -5225,9 +5237,22 @@ mdb_cmp_int_ua(const MDB_val *a, const MDB_val *b)
static int __hot
mdb_cmp_memn(const MDB_val *a, const MDB_val *b)
{
size_t minlen = (a->mv_size < b->mv_size) ? a->mv_size : b->mv_size;
int diff = memcmp(a->mv_data, b->mv_data, minlen);
return likely(diff) ? diff : mdbx_cmp2int(a->mv_size, b->mv_size);
/* LY: assumes that length of keys are NOT equal for most cases,
* if no then branch-prediction should mitigate the problem */
#if 0
/* LY: without branch instructions on x86,
* but isn't best for equal length of keys */
int diff_len = mdbx_cmp2int(a->mv_size, b->mv_size);
#else
/* LY: best when length of keys are equal,
* but got a branch-penalty otherwise */
if (unlikely(a->mv_size == b->mv_size))
return memcmp(a->mv_data, b->mv_data, a->mv_size);
int diff_len = (a->mv_size < b->mv_size) ? -1 : 1;
#endif
size_t shortest = (a->mv_size < b->mv_size) ? a->mv_size : b->mv_size;
int diff_data = memcmp(a->mv_data, b->mv_data, shortest);
return likely(diff_data) ? diff_data : diff_len;
}
/** Compare two items in reverse byte order */
@ -5834,11 +5859,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
MDB_node *leaf;
int rc;
if (unlikely(mc->mc_flags & C_EOF)) {
if ((mc->mc_flags & C_EOF) ||
((mc->mc_flags & C_DEL) && op == MDB_NEXT_DUP)) {
return MDB_NOTFOUND;
}
mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
if (!(mc->mc_flags & C_INITIALIZED))
return mdb_cursor_first(mc, key, data);
mp = mc->mc_pg[mc->mc_top];
@ -5917,7 +5943,12 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
MDB_node *leaf;
int rc;
mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
if (!(mc->mc_flags & C_INITIALIZED)) {
rc = mdb_cursor_last(mc, key, data);
if (unlikely(rc))
return rc;
mc->mc_ki[mc->mc_top]++;
}
mp = mc->mc_pg[mc->mc_top];
@ -6367,10 +6398,7 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
rc = MDB_INCOMPATIBLE;
break;
}
if (!(mc->mc_flags & C_INITIALIZED))
rc = mdb_cursor_first(mc, key, data);
else
rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP);
rc = mdb_cursor_next(mc, key, data, MDB_NEXT_DUP);
if (rc == MDB_SUCCESS) {
if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
MDB_cursor *mx;
@ -6412,21 +6440,11 @@ fetchm:
case MDB_NEXT:
case MDB_NEXT_DUP:
case MDB_NEXT_NODUP:
if (!(mc->mc_flags & C_INITIALIZED))
rc = mdb_cursor_first(mc, key, data);
else
rc = mdb_cursor_next(mc, key, data, op);
rc = mdb_cursor_next(mc, key, data, op);
break;
case MDB_PREV:
case MDB_PREV_DUP:
case MDB_PREV_NODUP:
if (!(mc->mc_flags & C_INITIALIZED)) {
rc = mdb_cursor_last(mc, key, data);
if (unlikely(rc))
break;
mc->mc_flags |= C_INITIALIZED;
mc->mc_ki[mc->mc_top]++;
}
rc = mdb_cursor_prev(mc, key, data, op);
break;
case MDB_FIRST:
@ -8473,8 +8491,6 @@ mdb_cursor_del0(MDB_cursor *mc)
if (m3->mc_pg[mc->mc_top] == mp) {
if (m3->mc_ki[mc->mc_top] == ki) {
m3->mc_flags |= C_DEL;
if (mc->mc_db->md_flags & MDB_DUPSORT)
m3->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
} else if (m3->mc_ki[mc->mc_top] > ki) {
m3->mc_ki[mc->mc_top]--;
}
@ -8508,11 +8524,21 @@ mdb_cursor_del0(MDB_cursor *mc)
continue;
if (m3->mc_pg[mc->mc_top] == mp) {
/* if m3 points past last node in page, find next sibling */
if (m3->mc_ki[mc->mc_top] >= nkeys) {
rc = mdb_cursor_sibling(m3, 1);
if (rc == MDB_NOTFOUND) {
m3->mc_flags |= C_EOF;
rc = MDB_SUCCESS;
if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) {
if (m3->mc_ki[mc->mc_top] >= nkeys) {
rc = mdb_cursor_sibling(m3, 1);
if (rc == MDB_NOTFOUND) {
m3->mc_flags |= C_EOF;
rc = MDB_SUCCESS;
continue;
}
}
if (mc->mc_db->md_flags & MDB_DUPSORT) {
MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]);
if (node->mn_flags & F_DUPDATA) {
mdb_xcursor_init1(m3, node);
m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
}
}
}
}
@ -9573,14 +9599,30 @@ mdb_env_copy(MDB_env *env, const char *path)
}
int __cold
mdb_env_set_flags(MDB_env *env, unsigned flag, int onoff)
mdb_env_set_flags(MDB_env *env, unsigned flags, int onoff)
{
if (unlikely(flag & ~CHANGEABLE))
if (unlikely(flags & ~CHANGEABLE))
return EINVAL;
pthread_mutex_t *mutex = MDB_MUTEX(env, w);
int rc = mdb_mutex_lock(env, mutex);
if (unlikely(rc))
return rc;
if (onoff)
env->me_flags |= flag;
flags = env->me_flags | flags;
else
env->me_flags &= ~flag;
flags = env->me_flags & ~flags;
#if MDB_LIFORECLAIM
/* LY: don't allow LIFO with just NOMETASYNC */
if ((flags & (MDB_NOMETASYNC | MDB_LIFORECLAIM | MDB_NOSYNC))
== (MDB_NOMETASYNC | MDB_LIFORECLAIM))
return EINVAL;
#endif /* MDB_LIFORECLAIM */
env->me_flags = flags;
mdb_mutex_unlock(env, mutex);
return MDB_SUCCESS;
}

View File

@ -50,6 +50,7 @@ flagbit dbflags[] = {
static volatile sig_atomic_t gotsignal;
static void signal_hanlder( int sig ) {
(void) sig;
gotsignal = 1;
}
@ -228,6 +229,8 @@ static size_t problems_pop(struct problem* list) {
static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi,
const char* type, int nentries, int payload_bytes, int header_bytes, int unused_bytes)
{
(void) ctx;
if (type) {
size_t page_bytes = payload_bytes + header_bytes + unused_bytes;
size_t page_size = pgnumber * stat.base.ms_psize;
@ -250,14 +253,14 @@ static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi,
problem_add("page", pgno, "illegal unused-bytes", "%zu < %i < %zu",
0, unused_bytes, stat.base.ms_psize);
if (header_bytes < sizeof(long) || header_bytes >= stat.base.ms_psize - sizeof(long))
if (header_bytes < (int) sizeof(long) || (size_t) header_bytes >= stat.base.ms_psize - sizeof(long))
problem_add("page", pgno, "illegal header-length", "%zu < %i < %zu",
sizeof(long), header_bytes, stat.base.ms_psize - sizeof(long));
if (payload_bytes < 1) {
if (nentries > 0) {
problem_add("page", pgno, "zero size-of-entry", "payload %i bytes, %i entries",
payload_bytes, nentries);
if (header_bytes + unused_bytes < page_size) {
if ((size_t) header_bytes + unused_bytes < page_size) {
/* LY: hush a misuse error */
page_bytes = page_size;
}
@ -302,6 +305,9 @@ typedef int (visitor)(size_t record_number, MDB_val *key, MDB_val* data);
static int process_db(MDB_dbi dbi, char *name, visitor *handler, int silent);
static int handle_userdb(size_t record_number, MDB_val *key, MDB_val* data) {
(void) record_number;
(void) key;
(void) data;
return MDB_SUCCESS;
}
@ -366,7 +372,8 @@ static int handle_freedb(size_t record_number, MDB_val *key, MDB_val* data) {
static int handle_maindb(size_t record_number, MDB_val *key, MDB_val* data) {
char *name;
int i, rc;
int rc;
size_t i;
name = key->mv_data;
for(i = 0; i < key->mv_size; ++i) {
@ -880,7 +887,7 @@ int main(int argc, char *argv[])
print(", gc %zu (%.1f%%)", freedb_pages, freedb_pages / percent);
value = freedb_pages - reclaimable_pages;
print(", reading %zu (%.1f%%)", value, value / percent);
print(", detained %zu (%.1f%%)", value, value / percent);
print(", reclaimable %zu (%.1f%%)", reclaimable_pages, reclaimable_pages / percent);
}

View File

@ -25,6 +25,7 @@
static void
sighandle(int sig)
{
(void) sig;
}
int main(int argc,char * argv[])

View File

@ -48,7 +48,8 @@ static volatile sig_atomic_t gotsig;
static void dumpsig( int sig )
{
gotsig=1;
(void) sig;
gotsig = 1;
}
static const char hexc[] = "0123456789abcdef";

View File

@ -224,7 +224,7 @@ int main(int argc, char *argv[])
printf(" Free pages: %zu %.1f%%\n", value, value / percent);
value = pages - reclaimable;
printf(" Reading: %zu %.1f%%\n", value, value / percent);
printf(" Detained: %zu %.1f%%\n", value, value / percent);
value = reclaimable;
printf(" Reclaimable: %zu %.1f%%\n", value, value / percent);

5
midl.c
View File

@ -28,7 +28,6 @@
/** @defgroup idls ID List Management
* @{
*/
#define CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) )
static unsigned __hot
mdb_midl_search( MDB_IDL ids, MDB_ID id )
@ -46,7 +45,7 @@ mdb_midl_search( MDB_IDL ids, MDB_ID id )
while( 0 < n ) {
unsigned pivot = n >> 1;
cursor = base + pivot + 1;
val = CMP( ids[cursor], id );
val = mdbx_cmp2int( ids[cursor], id );
if( val < 0 ) {
n = pivot;
@ -295,7 +294,7 @@ mdb_mid2l_search( MDB_ID2L ids, MDB_ID id )
while( 0 < n ) {
unsigned pivot = n >> 1;
cursor = base + pivot + 1;
val = CMP( id, ids[cursor].mid );
val = mdbx_cmp2int( id, ids[cursor].mid );
if( val < 0 ) {
n = pivot;

View File

@ -44,6 +44,8 @@ int main(int argc,char * argv[])
int env_oflags;
struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL));
count = (rand()%384) + 64;

View File

@ -52,6 +52,8 @@ int main(int argc,char * argv[])
int env_oflags;
struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL));
count = (rand()%384) + 64;
@ -118,22 +120,21 @@ int main(int argc,char * argv[])
mdb_txn_abort(txn);
mdb_env_sync(env, 1);
j=0;
int deleted = 0;
key.mv_data = sval;
for (i= count - 1; i > -1; i-= (rand()%5)) {
j++;
for (i = count - 1; i > -1; i -= (rand()%5)) {
txn=NULL;
E(mdb_txn_begin(env, NULL, 0, &txn));
sprintf(sval, "%03x ", values[i]);
if (RES(MDB_NOTFOUND, mdb_del(txn, dbi, &key, NULL))) {
j--;
mdb_txn_abort(txn);
} else {
E(mdb_txn_commit(txn));
deleted++;
}
}
free(values);
printf("Deleted %d values\n", j);
printf("Deleted %d values\n", deleted);
printf("check-preset-b.cursor-next\n");
E(mdb_env_stat(env, &mst));
@ -147,7 +148,7 @@ int main(int argc,char * argv[])
++present_b;
}
CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get");
CHECK(present_b == present_a - j, "mismatch");
CHECK(present_b == present_a - deleted, "mismatch");
printf("check-preset-b.cursor-prev\n");
j = 1;
@ -182,7 +183,8 @@ int main(int argc,char * argv[])
++present_c;
}
CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get");
CHECK(present_c == present_a, "mismatch");
printf("Rolled back %d deletion(s)\n", present_c - (present_a - deleted));
CHECK(present_c > present_a - deleted, "mismatch");
printf("check-preset-d.cursor-prev\n");
j = 1;

View File

@ -46,6 +46,8 @@ int main(int argc,char * argv[])
int env_oflags;
struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL));
count = (rand()%384) + 64;

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags;
struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL));
memset(sval, 0, sizeof(sval));

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags;
struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
memset(sval, 0, sizeof(sval));
count = 510;

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags;
struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL));
memset(sval, 0, sizeof(sval));

View File

@ -47,6 +47,8 @@ int main(int argc,char * argv[])
int env_oflags;
struct stat db_stat, exe_stat;
(void) argc;
(void) argv;
srand(time(NULL));
E(mdb_env_create(&env));

View File

@ -70,6 +70,9 @@
#ifndef __hot
# if defined(NDEBUG) && (defined(__GNUC__) && !defined(__clang__))
# define __hot __attribute__((hot, optimize("O3")))
# elif defined(__GNUC__)
/* cland case, just put frequently used functions in separate section */
# define __hot __attribute__((section("text.hot")))
# else
# define __hot
# endif
@ -80,7 +83,7 @@
# define __cold __attribute__((cold, optimize("Os")))
# elif defined(__GNUC__)
/* cland case, just put infrequently used functions in separate section */
# define __cold __attribute__((section("text.cold")))
# define __cold __attribute__((section("text.unlikely")))
# else
# define __cold
# endif

View File

@ -205,6 +205,9 @@ static void wbench(int flags, int mb, int count, int salt)
int main(int argc,char * argv[])
{
(void) argc;
(void) argv;
#define SALT 1
#define COUNT 10000
#define SIZE 12

View File

@ -10,7 +10,7 @@
#include <assert.h>
#include "mdbx.h"
#define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff)
#define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff)
char opt_db_path[PATH_MAX] = "/dev/shm/lmdb_bench1";
static MDB_env *env;
@ -23,74 +23,74 @@ int64_t lmdb_del = 0;
int64_t obj_id = 0;
static void add_id_to_pool(int64_t id) {
ids[ids_count] = id;
ids_count++;
ids[ids_count] = id;
ids_count++;
}
static inline int64_t getTimeMicroseconds(void) {
struct timeval val;
gettimeofday(&val, NULL);
return val.tv_sec * ((int64_t) 1000000) + val.tv_usec;
struct timeval val;
gettimeofday(&val, NULL);
return val.tv_sec * ((int64_t) 1000000) + val.tv_usec;
}
static int64_t get_id_from_pool() {
if (ids_count == 0) {
return -1;
}
int32_t index = rand() % ids_count;
int64_t id = ids[index];
ids[index] = ids[ids_count - 1];
ids_count--;
return id;
if (ids_count == 0) {
return -1;
}
int32_t index = rand() % ids_count;
int64_t id = ids[index];
ids[index] = ids[ids_count - 1];
ids_count--;
return id;
}
#define LMDB_CHECK(x) \
do {\
const int rc = (x);\
if ( rc != MDB_SUCCESS ) {\
printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
}\
} while(0)
do {\
const int rc = (x);\
if ( rc != MDB_SUCCESS ) {\
printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
}\
} while(0)
static void db_connect() {
LMDB_CHECK(mdb_env_create(&env));
LMDB_CHECK(mdb_env_set_mapsize(env, 3L * 1024L * 1024L * 1024L));
LMDB_CHECK(mdb_env_set_maxdbs(env, 30));
LMDB_CHECK(mdb_env_create(&env));
LMDB_CHECK(mdb_env_set_mapsize(env, 3L * 1024L * 1024L * 1024L));
LMDB_CHECK(mdb_env_set_maxdbs(env, 30));
#if defined(MDB_LIFORECLAIM)
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664));
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664));
#else
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664));
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664));
#endif
printf("Connection open\n");
printf("Connection open\n");
}
typedef struct {
char session_id1[100];
char session_id2[100];
char ip[20];
uint8_t fill[100];
char session_id1[100];
char session_id2[100];
char ip[20];
uint8_t fill[100];
} session_data_t;
typedef struct {
int64_t obj_id;
int8_t event_type;
int64_t obj_id;
int8_t event_type;
} __attribute__((__packed__)) event_data_t;
static void create_record(int64_t record_id) {
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
session_data_t data;
// transaction init
snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id);
snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999);
snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF));
event.obj_id = record_id;
event.event_type = 1;
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
session_data_t data;
// transaction init
snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id);
snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999);
snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF));
event.obj_id = record_id;
event.event_type = 1;
MDB_val _session_id1_rec = {data.session_id1, strlen(data.session_id1)};
MDB_val _session_id2_rec = {data.session_id2, strlen(data.session_id2)};
@ -99,139 +99,142 @@ static void create_record(int64_t record_id) {
MDB_val _data_rec = {&data, offsetof(session_data_t, fill) + (rand() % sizeof (data.fill))};
MDB_val _event_rec = {&event, sizeof (event)};
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0));
LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0));
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0));
LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0));
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_add++;
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_add++;
}
static void delete_record(int64_t record_id) {
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
// transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// put data
// transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// put data
MDB_val _obj_id_rec = {&record_id, sizeof(record_id)};
MDB_val v_rec;
// get data
LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &v_rec));
session_data_t* data = (session_data_t*) v_rec.mv_data;
MDB_val v_rec;
// get data
LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &v_rec));
session_data_t* data = (session_data_t*) v_rec.mv_data;
MDB_val _session_id1_rec = {data->session_id1, strlen(data->session_id1)};
MDB_val _session_id2_rec = {data->session_id2, strlen(data->session_id2)};
MDB_val _ip_rec = {data->ip, strlen(data->ip)};
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL));
event.obj_id = record_id;
event.event_type = 1;
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL));
event.obj_id = record_id;
event.event_type = 1;
MDB_val _event_rec = {&event, sizeof(event)};
LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL));
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_del++;
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_del++;
}
static void db_disconnect() {
mdb_env_close(env);
printf("Connection closed\n");
mdb_env_close(env);
printf("Connection closed\n");
}
static void get_db_stat(const char* db, int64_t* ms_branch_pages, int64_t* ms_leaf_pages) {
MDB_txn *txn;
MDB_stat stat;
MDB_dbi dbi;
MDB_txn *txn;
MDB_stat stat;
MDB_dbi dbi;
LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi));
LMDB_CHECK(mdb_stat(txn, dbi, &stat));
mdb_txn_abort(txn);
printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n",
db,
stat.ms_branch_pages,
stat.ms_depth,
stat.ms_entries,
stat.ms_leaf_pages,
stat.ms_overflow_pages);
(*ms_branch_pages) += stat.ms_branch_pages;
(*ms_leaf_pages) += stat.ms_leaf_pages;
LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi));
LMDB_CHECK(mdb_stat(txn, dbi, &stat));
mdb_txn_abort(txn);
printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n",
db,
stat.ms_branch_pages,
stat.ms_depth,
stat.ms_entries,
stat.ms_leaf_pages,
stat.ms_overflow_pages);
(*ms_branch_pages) += stat.ms_branch_pages;
(*ms_leaf_pages) += stat.ms_leaf_pages;
}
static void periodic_stat(void) {
int64_t ms_branch_pages = 0;
int64_t ms_leaf_pages = 0;
printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n");
get_db_stat("session", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("event", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages);
printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, "");
static int64_t prev_add;
static int64_t prev_del;
static int64_t t = -1;
if (t > 0) {
int64_t delta = getTimeMicroseconds() - t;
printf("CPS: add %ld, delete %ld, items processed - %ld\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id);
}
t = getTimeMicroseconds();
prev_add = lmdb_add;
prev_del = lmdb_del;
int64_t ms_branch_pages = 0;
int64_t ms_leaf_pages = 0;
printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n");
get_db_stat("session", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("event", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages);
printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, "");
static int64_t prev_add;
static int64_t prev_del;
static int64_t t = -1;
if (t > 0) {
int64_t delta = getTimeMicroseconds() - t;
printf("CPS: add %ld, delete %ld, items processed - %ld\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id);
}
t = getTimeMicroseconds();
prev_add = lmdb_add;
prev_del = lmdb_del;
}
static void periodic_add_rec() {
int i;
for (i = 0; i < 10000; i++) {
if (ids_count <= REC_COUNT) {
int64_t id = obj_id++;
create_record(id);
add_id_to_pool(id);
}
if (ids_count > REC_COUNT) {
int64_t id = get_id_from_pool();
delete_record(id);
}
}
periodic_stat();
int i;
for (i = 0; i < 10000; i++) {
if (ids_count <= REC_COUNT) {
int64_t id = obj_id++;
create_record(id);
add_id_to_pool(id);
}
if (ids_count > REC_COUNT) {
int64_t id = get_id_from_pool();
delete_record(id);
}
}
periodic_stat();
}
int main(int argc, char** argv) {
char filename[PATH_MAX];
mkdir(opt_db_path, 0775);
(void) argc;
(void) argv;
strcpy(filename, opt_db_path);
strcat(filename, "/data.mdb");
remove(filename);
char filename[PATH_MAX];
mkdir(opt_db_path, 0775);
strcpy(filename, opt_db_path);
strcat(filename, "/lock.mdb");
remove(filename);
strcpy(filename, opt_db_path);
strcat(filename, "/data.mdb");
remove(filename);
db_connect();
while (1) {
periodic_add_rec();
}
db_disconnect();
return 0;
strcpy(filename, opt_db_path);
strcat(filename, "/lock.mdb");
remove(filename);
db_connect();
while (1) {
periodic_add_rec();
}
db_disconnect();
return 0;
}

View File

@ -10,7 +10,7 @@
#include <assert.h>
#include "mdbx.h"
#define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff)
#define IP_PRINTF_ARG_HOST(addr) (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), (int)((addr) & 0xff)
char opt_db_path[PATH_MAX] = "/dev/shm/lmdb_bench2";
static MDB_env *env;
@ -25,90 +25,90 @@ int64_t lmdb_data_size = 0;
int64_t lmdb_key_size = 0;
static void add_id_to_pool(int64_t id) {
ids[ids_count] = id;
ids_count++;
ids[ids_count] = id;
ids_count++;
}
static inline int64_t getTimeMicroseconds(void) {
struct timeval val;
gettimeofday(&val, NULL);
return val.tv_sec * ((int64_t) 1000000) + val.tv_usec;
struct timeval val;
gettimeofday(&val, NULL);
return val.tv_sec * ((int64_t) 1000000) + val.tv_usec;
}
static int64_t get_id_from_pool() {
if (ids_count == 0) {
return -1;
}
int32_t index = rand() % ids_count;
int64_t id = ids[index];
ids[index] = ids[ids_count - 1];
ids_count--;
return id;
if (ids_count == 0) {
return -1;
}
int32_t index = rand() % ids_count;
int64_t id = ids[index];
ids[index] = ids[ids_count - 1];
ids_count--;
return id;
}
#define LMDB_CHECK(x) \
do {\
const int rc = (x);\
if ( rc != MDB_SUCCESS ) {\
printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
}\
} while(0)
do {\
const int rc = (x);\
if ( rc != MDB_SUCCESS ) {\
printf("Error [%d] %s in %s at %s:%d\n", rc, mdb_strerror(rc), #x, __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
}\
} while(0)
static void db_connect() {
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
LMDB_CHECK(mdb_env_create(&env));
LMDB_CHECK(mdb_env_set_mapsize(env, 300000L * 4096L));
LMDB_CHECK(mdb_env_set_maxdbs(env, 30));
LMDB_CHECK(mdb_env_create(&env));
LMDB_CHECK(mdb_env_set_mapsize(env, 300000L * 4096L));
LMDB_CHECK(mdb_env_set_maxdbs(env, 30));
#if defined(MDB_LIFORECLAIM)
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664));
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP | MDB_LIFORECLAIM, 0664));
#else
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664));
LMDB_CHECK(mdb_env_open(env, opt_db_path, MDB_CREATE | MDB_NOSYNC | MDB_WRITEMAP, 0664));
#endif
MDB_txn *txn;
MDB_txn *txn;
// transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
printf("Connection open\n");
// transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
printf("Connection open\n");
}
typedef struct {
char session_id1[100];
char session_id2[100];
char ip[20];
uint8_t fill[100];
char session_id1[100];
char session_id2[100];
char ip[20];
uint8_t fill[100];
} session_data_t;
typedef struct {
int64_t obj_id;
int8_t event_type;
int64_t obj_id;
int8_t event_type;
} __attribute__((__packed__)) event_data_t;
static void create_record(int64_t record_id) {
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
session_data_t data;
// transaction init
snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id);
snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999);
snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF));
event.obj_id = record_id;
event.event_type = 1;
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
session_data_t data;
// transaction init
snprintf(data.session_id1, sizeof (data.session_id1), "mskugw%02ld_%02ld.gx.yota.ru;3800464060;4152;%ld", record_id % 3 + 1, record_id % 9 + 1, record_id);
snprintf(data.session_id2, sizeof (data.session_id2), "gx_service;%ld;%ld;node@spb-jsm1", record_id, record_id % 1000000000 + 99999);
snprintf(data.ip, sizeof (data.ip), "%d.%d.%d.%d", IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF));
event.obj_id = record_id;
event.event_type = 1;
MDB_val _session_id1_rec = {data.session_id1, strlen(data.session_id1)};
MDB_val _session_id2_rec = {data.session_id2, strlen(data.session_id2)};
@ -117,171 +117,174 @@ static void create_record(int64_t record_id) {
MDB_val _data_rec = {&data, offsetof(session_data_t, fill) + (rand() % sizeof (data.fill))};
MDB_val _event_rec = {&event, sizeof(event)};
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0));
LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0));
lmdb_data_size += (_data_rec.mv_size + _obj_id_rec.mv_size * 4);
lmdb_key_size += (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size);
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
LMDB_CHECK(mdb_put(txn, dbi_session, &_obj_id_rec, &_data_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id1_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_session_id, &_session_id2_rec, &_obj_id_rec, MDB_NOOVERWRITE | MDB_NODUPDATA));
LMDB_CHECK(mdb_put(txn, dbi_ip, &_ip_rec, &_obj_id_rec, 0));
LMDB_CHECK(mdb_put(txn, dbi_event, &_event_rec, &_obj_id_rec, 0));
lmdb_data_size += (_data_rec.mv_size + _obj_id_rec.mv_size * 4);
lmdb_key_size += (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size);
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_add++;
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_add++;
}
static void delete_record(int64_t record_id) {
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
MDB_dbi dbi_session;
MDB_dbi dbi_session_id;
MDB_dbi dbi_event;
MDB_dbi dbi_ip;
event_data_t event;
MDB_txn *txn;
// transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// put data
// transaction init
LMDB_CHECK(mdb_txn_begin(env, NULL, 0, &txn));
// open database in read-write mode
LMDB_CHECK(mdb_dbi_open(txn, "session", MDB_CREATE, &dbi_session));
LMDB_CHECK(mdb_dbi_open(txn, "session_id", MDB_CREATE, &dbi_session_id));
LMDB_CHECK(mdb_dbi_open(txn, "event", MDB_CREATE, &dbi_event));
LMDB_CHECK(mdb_dbi_open(txn, "ip", MDB_CREATE, &dbi_ip));
// put data
MDB_val _obj_id_rec = {&record_id, sizeof(record_id)};
MDB_val _data_rec;
// get data
LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &_data_rec));
session_data_t* data = (session_data_t*) _data_rec.mv_data;
MDB_val _data_rec;
// get data
LMDB_CHECK(mdb_get(txn, dbi_session, &_obj_id_rec, &_data_rec));
session_data_t* data = (session_data_t*) _data_rec.mv_data;
MDB_val _session_id1_rec = {data->session_id1, strlen(data->session_id1)};
MDB_val _session_id2_rec = {data->session_id2, strlen(data->session_id2)};
MDB_val _ip_rec = {data->ip, strlen(data->ip)};
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL));
event.obj_id = record_id;
event.event_type = 1;
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id1_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session_id, &_session_id2_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_ip, &_ip_rec, NULL));
event.obj_id = record_id;
event.event_type = 1;
MDB_val _event_rec = {&event, sizeof(event)};
LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_event, &_event_rec, NULL));
LMDB_CHECK(mdb_del(txn, dbi_session, &_obj_id_rec, NULL));
lmdb_data_size -= (_data_rec.mv_size + _obj_id_rec.mv_size * 4);
lmdb_key_size -= (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size);
lmdb_data_size -= (_data_rec.mv_size + _obj_id_rec.mv_size * 4);
lmdb_key_size -= (_obj_id_rec.mv_size + _session_id1_rec.mv_size + _session_id2_rec.mv_size + _ip_rec.mv_size + _event_rec.mv_size);
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_del++;
// transaction commit
LMDB_CHECK(mdb_txn_commit(txn));
lmdb_del++;
}
static void db_disconnect() {
mdb_env_close(env);
printf("Connection closed\n");
mdb_env_close(env);
printf("Connection closed\n");
}
static void get_db_stat(const char* db, int64_t* ms_branch_pages, int64_t* ms_leaf_pages) {
MDB_txn *txn;
MDB_stat stat;
MDB_dbi dbi;
MDB_txn *txn;
MDB_stat stat;
MDB_dbi dbi;
LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi));
LMDB_CHECK(mdb_stat(txn, dbi, &stat));
mdb_txn_abort(txn);
printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n",
db,
stat.ms_branch_pages,
stat.ms_depth,
stat.ms_entries,
stat.ms_leaf_pages,
stat.ms_overflow_pages);
(*ms_branch_pages) += stat.ms_branch_pages;
(*ms_leaf_pages) += stat.ms_leaf_pages;
LMDB_CHECK(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
LMDB_CHECK(mdb_dbi_open(txn, db, MDB_CREATE, &dbi));
LMDB_CHECK(mdb_stat(txn, dbi, &stat));
mdb_txn_abort(txn);
printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n",
db,
stat.ms_branch_pages,
stat.ms_depth,
stat.ms_entries,
stat.ms_leaf_pages,
stat.ms_overflow_pages);
(*ms_branch_pages) += stat.ms_branch_pages;
(*ms_leaf_pages) += stat.ms_leaf_pages;
}
static void periodic_stat(void) {
int64_t ms_branch_pages = 0;
int64_t ms_leaf_pages = 0;
printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n");
get_db_stat("session", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("event", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages);
printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, "");
static int64_t prev_add;
static int64_t prev_del;
static int64_t t = -1;
if (t > 0) {
int64_t delta = getTimeMicroseconds() - t;
printf("CPS: add %ld, delete %ld, items processed - %ldK data=%ldK key=%ldK\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id / 1024, lmdb_data_size / 1024, lmdb_key_size / 1024);
printf("usage data=%ld%%\n", ((lmdb_data_size + lmdb_key_size) * 100) / ((ms_leaf_pages + ms_branch_pages)*4096));
}
t = getTimeMicroseconds();
prev_add = lmdb_add;
prev_del = lmdb_del;
int64_t ms_branch_pages = 0;
int64_t ms_leaf_pages = 0;
printf(" Name | ms_branch_pages | depth | entries | leaf_pages | overf_pages |\n");
get_db_stat("session", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("event", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages);
printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages, "", "", ms_leaf_pages, "");
static int64_t prev_add;
static int64_t prev_del;
static int64_t t = -1;
if (t > 0) {
int64_t delta = getTimeMicroseconds() - t;
printf("CPS: add %ld, delete %ld, items processed - %ldK data=%ldK key=%ldK\n", (lmdb_add - prev_add)*1000000 / delta, (lmdb_del - prev_del)*1000000 / delta, obj_id / 1024, lmdb_data_size / 1024, lmdb_key_size / 1024);
printf("usage data=%ld%%\n", ((lmdb_data_size + lmdb_key_size) * 100) / ((ms_leaf_pages + ms_branch_pages)*4096));
}
t = getTimeMicroseconds();
prev_add = lmdb_add;
prev_del = lmdb_del;
}
//static void periodic_add_rec() {
// for (int i = 0; i < 10240; i++) {
// if (ids_count <= REC_COUNT) {
// int64_t id = obj_id++;
// create_record(id);
// add_id_to_pool(id);
// }
// if (ids_count > REC_COUNT) {
// int64_t id = get_id_from_pool();
// delete_record(id);
// }
// }
// periodic_stat();
// for (int i = 0; i < 10240; i++) {
// if (ids_count <= REC_COUNT) {
// int64_t id = obj_id++;
// create_record(id);
// add_id_to_pool(id);
// }
// if (ids_count > REC_COUNT) {
// int64_t id = get_id_from_pool();
// delete_record(id);
// }
// }
// periodic_stat();
//}
int main(int argc, char** argv) {
char filename[PATH_MAX];
int i;
int64_t t;
(void) argc;
(void) argv;
mkdir(opt_db_path, 0775);
char filename[PATH_MAX];
int i;
int64_t t;
strcpy(filename, opt_db_path);
strcat(filename, "/data.mdb");
remove(filename);
mkdir(opt_db_path, 0775);
strcpy(filename, opt_db_path);
strcat(filename, "/lock.mdb");
remove(filename);
strcpy(filename, opt_db_path);
strcat(filename, "/data.mdb");
remove(filename);
db_connect();
periodic_stat();
for (i = 0; i < 1024000; i++) {
int64_t id = obj_id++;
create_record(id);
add_id_to_pool(id);
}
periodic_stat();
t = getTimeMicroseconds();
while (1) {
int i;
int64_t now;
for (i = 0; i < 100; i++) {
int64_t id = obj_id++;
create_record(id);
add_id_to_pool(id);
id = get_id_from_pool();
delete_record(id);
}
//int64_t id = obj_id++;
//create_record(id);
//add_id_to_pool(id);
now = getTimeMicroseconds();
if ((now - t) > 100000) {
periodic_stat();
t = now;
}
}
db_disconnect();
return 0;
strcpy(filename, opt_db_path);
strcat(filename, "/lock.mdb");
remove(filename);
db_connect();
periodic_stat();
for (i = 0; i < 1024000; i++) {
int64_t id = obj_id++;
create_record(id);
add_id_to_pool(id);
}
periodic_stat();
t = getTimeMicroseconds();
while (1) {
int i;
int64_t now;
for (i = 0; i < 100; i++) {
int64_t id = obj_id++;
create_record(id);
add_id_to_pool(id);
id = get_id_from_pool();
delete_record(id);
}
//int64_t id = obj_id++;
//create_record(id);
//add_id_to_pool(id);
now = getTimeMicroseconds();
if ((now - t) > 100000) {
periodic_stat();
t = now;
}
}
db_disconnect();
return 0;
}