mdbx: Merge branch 'nexenta' (preparation to stable stage).

This commit is contained in:
Leo Yuriev 2017-07-04 08:55:48 +03:00
commit d5a0fe539f
6 changed files with 581 additions and 2 deletions

View File

@ -41,7 +41,7 @@ LIBRARIES := libmdbx.a libmdbx.so
TOOLS := mdbx_stat mdbx_copy mdbx_dump mdbx_load mdbx_chk
MANPAGES := mdb_stat.1 mdb_copy.1 mdb_dump.1 mdb_load.1
TESTS := mtest0 mtest1 mtest2 mtest3 mtest4 mtest5 mtest6 wbench \
yota_test1 yota_test2
yota_test1 yota_test2 mtest7 mtest8
SRC_LMDB := mdb.c midl.c lmdb.h midl.h defs.h barriers.h
SRC_MDBX := $(SRC_LMDB) mdbx.c mdbx.h
@ -80,6 +80,8 @@ check: tests
&& echo "*** LMDB-TEST-4" && ./mtest4 && ./mdbx_chk -v testdb \
&& echo "*** LMDB-TEST-5" && ./mtest5 && ./mdbx_chk -v testdb \
&& echo "*** LMDB-TEST-6" && ./mtest6 && ./mdbx_chk -v testdb \
&& echo "*** LMDB-TEST-7" && ./mtest7 && ./mdbx_chk -v testdb \
&& echo "*** LMDB-TEST-8" && ./mtest8 && ./mdbx_chk -v testdb \
&& echo "*** LMDB-TESTs - all done"
libmdbx.a: mdbx.o
@ -130,6 +132,12 @@ mtest5: mtest5.o mdbx.o
mtest6: mtest6.o mdbx.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
mtest7: mtest7.o mdbx.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
mtest8: mtest8.o mdbx.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
yota_test1: yota_test1.o mdbx.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^

164
lmdb.h
View File

@ -1671,6 +1671,170 @@ int mdb_reader_check(MDB_env *env, int *dead);
char* mdb_dkey(MDB_val *key, char *buf);
/* attribute support functions for Nexenta ***********************************/
#if MDBX_MODE_ENABLED
typedef uint64_t mdbx_attr_t;
/** @brief Store by cursor with attribute.
*
* This function stores key/data pairs into the database.
* The cursor is positioned at the new item, or on failure usually near it.
* @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT.
* @note Earlier documentation incorrectly said errors would leave the
* state of the cursor unchanged.
* @param[in] cursor A cursor handle returned by #mdb_cursor_open()
* @param[in] key The key operated on.
* @param[in] data The data operated on.
* @param[in] attr The attribute.
* @param[in] flags Options for this operation. This parameter
* must be set to 0 or one of the values described here.
* <ul>
* <li>#MDB_CURRENT - replace the item at the current cursor position.
* The \b key parameter must still be provided, and must match it.
* This is intended to be used when the
* new data is the same size as the old. Otherwise it will simply
* perform a delete of the old record followed by an insert.
* <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key
* does not already appear in the database. The function will return
* #MDB_KEYEXIST if the key already appears in the database.
* <li>#MDB_RESERVE - reserve space for data of the given size, but
* don't copy the given data. Instead, return a pointer to the
* reserved space, which the caller can fill in later. This saves
* an extra memcpy if the data is being generated later.
* <li>#MDB_APPEND - append the given key/data pair to the end of the
* database. No key comparisons are performed. This option allows
* fast bulk loading when keys are already known to be in the
* correct order. Loading unsorted keys with this flag will cause
* data corruption.
* </ul>
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
* <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
* <li>EACCES - an attempt was made to write in a read-only transaction.
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdbx_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
mdbx_attr_t attr, unsigned flags);
/** @brief Store items and attributes into a database.
*
* This function stores key/data pairs in the database. The default behavior
* is to enter the new key/data pair, replacing any previously existing key
* if duplicates are disallowed.
* @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] key The key to store in the database
* @param[in] attr The attribute to store in the database
* @param[in,out] data The data to store
* @param[in] flags Special options for this operation. This parameter
* must be set to 0 or by bitwise OR'ing together one or more of the
* values described here.
* <ul>
* <li>#MDB_NOOVERWRITE - enter the new key/data pair only if the key
* does not already appear in the database. The function will return
* #MDB_KEYEXIST if the key already appears in the database. The \b data
* parameter will be set to point to the existing item.
* <li>#MDB_RESERVE - reserve space for data of the given size, but
* don't copy the given data. Instead, return a pointer to the
* reserved space, which the caller can fill in later - before
* the next update operation or the transaction ends. This saves
* an extra memcpy if the data is being generated later.
* LMDB does nothing else with this memory, the caller is expected
* to modify all of the space requested.
* <li>#MDB_APPEND - append the given key/data pair to the end of the
* database. This option allows fast bulk loading when keys are
* already known to be in the correct order. Loading unsorted keys
* with this flag will cause a #MDB_KEYEXIST error.
* </ul>
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_MAP_FULL - the database is full, see #mdb_env_set_mapsize().
* <li>#MDB_TXN_FULL - the transaction has too many dirty pages.
* <li>EACCES - an attempt was made to write in a read-only transaction.
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdbx_put_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
mdbx_attr_t attr, unsigned flags);
/** @brief Set items attribute from a database.
*
* This function stores key/data pairs attribute to the database.
* @note Internally based on #MDB_RESERVE feature, therefore doesn't support #MDB_DUPSORT.
*
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] key The key to search for in the database
* @param[in] data The data to be stored or NULL to save previous value.
* @param[in] attr The attribute to be stored
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_NOTFOUND - the key-value pair was not in the database.
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdbx_set_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
mdbx_attr_t attr);
/** @brief Get items attribute from a database cursor.
*
* This function retrieves key/data pairs attribute from the database.
* The attribute of the specified key-value pair is returned in
* uint64_t to which \b attrptr refers.
* If the database supports duplicate keys (#MDB_DUPSORT) then both
* key and data parameters are required, otherwise data could be NULL.
*
* @note Values returned from the database are valid only until a
* subsequent update operation, or the end of the transaction.
* @param[in] mc A database cursor pointing at the node
* @param[in] key The key to search for in the database
* @param[in,out] data The data for #MDB_DUPSORT databases
* @param[out] attrptr The pointer to the result
* @param[in] op A cursor operation #MDB_cursor_op
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_NOTFOUND - the key-value pair was not in the database.
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdbx_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data,
mdbx_attr_t *attrptr, MDB_cursor_op op);
/** @brief Get items attribute from a database.
*
* This function retrieves key/data pairs attribute from the database.
* The attribute of the specified key-value pair is returned in
* uint64_t to which \b attrptr refers.
* If the database supports duplicate keys (#MDB_DUPSORT) then both
* key and data parameters are required, otherwise data is ignored.
*
* @note Values returned from the database are valid only until a
* subsequent update operation, or the end of the transaction.
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[in] dbi A database handle returned by #mdb_dbi_open()
* @param[in] key The key to search for in the database
* @param[in] data The data for #MDB_DUPSORT databases
* @param[out] attrptr The pointer to the result
* @return A non-zero error value on failure and 0 on success. Some possible
* errors are:
* <ul>
* <li>#MDB_NOTFOUND - the key-value pair was not in the database.
* <li>EINVAL - an invalid parameter was specified.
* </ul>
*/
int mdbx_get_attr(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
mdbx_attr_t *attrptr);
#endif /* MDBX_MODE_ENABLED */
#ifdef __cplusplus
}
#endif

2
mdb.c
View File

@ -6239,7 +6239,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
/** Set the cursor on a specific data item. */
static int
mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
MDB_cursor_op op, int *exactp)
MDB_cursor_op op, int *exactp)
{
int rc;
MDB_page *mp;

137
mdbx.c
View File

@ -742,3 +742,140 @@ int mdbx_dbi_open_ex(MDB_txn *txn, const char *name, unsigned flags,
}
return rc;
}
/* attribute support functions for Nexenta ***********************************/
static __inline int
mdbx_attr_peek(MDB_val *data, mdbx_attr_t *attrptr)
{
if (unlikely(data->mv_size < sizeof(mdbx_attr_t)))
return MDB_INCOMPATIBLE;
if (likely(attrptr != NULL))
*attrptr = *(mdbx_attr_t*) data->mv_data;
data->mv_size -= sizeof(mdbx_attr_t);
data->mv_data = likely(data->mv_size > 0)
? ((mdbx_attr_t*) data->mv_data) + 1 : NULL;
return MDB_SUCCESS;
}
static __inline int
mdbx_attr_poke(MDB_val *reserved, MDB_val *data, mdbx_attr_t attr, unsigned flags)
{
mdbx_attr_t *space = reserved->mv_data;
if (flags & MDB_RESERVE) {
if (likely(data != NULL)) {
data->mv_data = data->mv_size ? space + 1 : NULL;
}
} else {
*space = attr;
if (likely(data != NULL)) {
memcpy(space + 1, data->mv_data, data->mv_size );
}
}
return MDB_SUCCESS;
}
int
mdbx_cursor_get_attr(MDB_cursor *mc, MDB_val *key, MDB_val *data,
mdbx_attr_t *attrptr, MDB_cursor_op op)
{
int rc = mdbx_cursor_get(mc, key, data, op);
if (unlikely(rc != MDB_SUCCESS))
return rc;
return mdbx_attr_peek(data, attrptr);
}
int
mdbx_get_attr(MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data, uint64_t *attrptr)
{
int rc = mdbx_get(txn, dbi, key, data);
if (unlikely(rc != MDB_SUCCESS))
return rc;
return mdbx_attr_peek(data, attrptr);
}
int
mdbx_put_attr(MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data, mdbx_attr_t attr, unsigned flags)
{
MDB_val reserve = {
.mv_data = NULL,
.mv_size = (data ? data->mv_size : 0) + sizeof(mdbx_attr_t)
};
int rc = mdbx_put(txn, dbi, key, &reserve, flags | MDB_RESERVE);
if (unlikely(rc != MDB_SUCCESS))
return rc;
return mdbx_attr_poke(&reserve, data, attr, flags);
}
int mdbx_cursor_put_attr(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
mdbx_attr_t attr, unsigned flags)
{
MDB_val reserve = {
.mv_data = NULL,
.mv_size = (data ? data->mv_size : 0) + sizeof(mdbx_attr_t)
};
int rc = mdbx_cursor_put(cursor, key, &reserve, flags | MDB_RESERVE);
if (unlikely(rc != MDB_SUCCESS))
return rc;
return mdbx_attr_poke(&reserve, data, attr, flags);
}
int mdbx_set_attr(MDB_txn *txn, MDB_dbi dbi,
MDB_val *key, MDB_val *data, mdbx_attr_t attr)
{
MDB_cursor mc;
MDB_xcursor mx;
MDB_val old_data;
mdbx_attr_t old_attr;
int rc;
if (unlikely(!key || !txn))
return EINVAL;
if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE))
return MDB_VERSION_MISMATCH;
if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
return EINVAL;
if (unlikely(txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED)))
return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
mdb_cursor_init(&mc, txn, dbi, &mx);
rc = mdb_cursor_set(&mc, key, &old_data, MDB_SET, NULL);
if (unlikely(rc != MDB_SUCCESS)) {
if (rc == MDB_NOTFOUND && data) {
mc.mc_next = txn->mt_cursors[dbi];
txn->mt_cursors[dbi] = &mc;
rc = mdbx_cursor_put_attr(&mc, key, data, attr, 0);
txn->mt_cursors[dbi] = mc.mc_next;
}
return rc;
}
rc = mdbx_attr_peek(&old_data, &old_attr);
if (unlikely(rc != MDB_SUCCESS))
return rc;
if (old_attr == attr && (!data ||
(data->mv_size == old_data.mv_size
&& memcpy(data->mv_data, old_data.mv_data, old_data.mv_size) == 0)))
return MDB_SUCCESS;
mc.mc_next = txn->mt_cursors[dbi];
txn->mt_cursors[dbi] = &mc;
rc = mdbx_cursor_put_attr(&mc, key, data ? data : &old_data, attr, MDB_CURRENT);
txn->mt_cursors[dbi] = mc.mc_next;
return rc;
}

124
mtest7.c Normal file
View File

@ -0,0 +1,124 @@
/* mtest7.c - memory-mapped database tester/toy */
/*
* Copyright 2015 Ilya Usvyatsky, Nexenta Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
/* Tests for DB attributes */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include "mdbx.h"
#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr)
#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0))
#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \
"%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort()))
char dkbuf[1024];
#ifndef DBPATH
# define DBPATH "./testdb/data.mdb"
#endif
int main(int argc,char * argv[])
{
int i = 0, j = 0, rc;
MDB_env *env;
MDB_dbi dbi;
MDB_val key, data;
MDB_txn *txn;
MDB_stat mst;
int count;
int *values;
char sval[32];
uint64_t *timestamps, timestamp;
struct timeval tv;
int env_opt = MDB_NOMEMINIT | MDB_NOSYNC | MDB_NOSUBDIR | MDB_NORDAHEAD;
srand(time(NULL));
memset(sval, 0, sizeof(sval));
count = (rand()%384) + 64;
if (argc > 1)
count = atoi(argv[1]);
values = (int *)malloc(count*sizeof(int));
timestamps = (uint64_t *)calloc(count,sizeof(uint64_t));
unlink(DBPATH);
E(mdb_env_create(&env));
E(mdb_env_set_mapsize(env, 104857600));
E(mdb_env_set_maxdbs(env, 8));
E(mdb_env_open(env, DBPATH, env_opt, 0664));
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_dbi_open(txn, "id7", MDB_CREATE|MDB_INTEGERKEY, &dbi));
key.mv_size = sizeof(int);
data.mv_size = sizeof(sval);
data.mv_data = sval;
printf("Adding %d values\n", count);
for (i=0;i<count;i++) {
(void)gettimeofday(&tv, NULL);
timestamps[i] = tv.tv_usec + 1000000UL * tv.tv_sec;
values[i] = rand()%16383 ^ (timestamps[i] & 0xffff);
key.mv_data = values + i;
sprintf(sval, "%03x %d foo bar", values[i], values[i]);
E(mdbx_put_attr(txn, dbi, &key, &data, timestamps[i], MDB_NODUPDATA));
}
if (j) printf("%d duplicates skipped\n", j);
E(mdb_txn_commit(txn));
E(mdb_env_stat(env, &mst));
mdb_env_close(env);
E(mdb_env_create(&env));
E(mdb_env_set_mapsize(env, 10485760));
E(mdb_env_set_maxdbs(env, 8));
E(mdb_env_open(env, DBPATH, env_opt, 0664));
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_dbi_open(txn, "id7", MDB_CREATE|MDB_INTEGERKEY, &dbi));
for (i=0;!rc&&i<count;i++) {
if (!timestamps[i])
continue;
key.mv_data = values + i;
sprintf(sval, "%03x %d foo bar", values[i], values[i]);
E(mdbx_get_attr(txn, dbi, &key, &data, &timestamp));
if (timestamps[i] != timestamp) {
for (j = 0; j < count; ++j) {
if (j != i && values[i] == values[j] &&
timestamp == timestamps[j]) {
printf("Duplicate keys "
"%d %d %d %d %lu %lu\n",
i, j, values[i], values[j],
timestamps[i], timestamps[j]);
break;
}
}
if (j >= count) {
printf("Timestamp mismatch "
"%d %03x %d %lu != %lu\n",
i, values[i], values[i], timestamps[i],
timestamp);
break;
}
}
}
E(mdb_txn_commit(txn));
E(mdb_env_stat(env, &mst));
mdb_env_close(env);
return 0;
}

146
mtest8.c Normal file
View File

@ -0,0 +1,146 @@
/* mtest8.c - memory-mapped database tester/toy */
/*
* Copyright 2015 Ilya Usvyatsky, Nexenta Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
/* Tests for DB attributes */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include "mdbx.h"
#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr)
#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0))
#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \
"%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort()))
char dkbuf[1024];
#ifndef DBPATH
# define DBPATH "./testdb/data.mdb"
#endif
int main(int argc,char * argv[])
{
int i = 0, rc;
MDB_env *env;
MDB_dbi dbi;
MDB_val key, data;
MDB_txn *txn;
MDB_stat mst;
int count;
int *values;
char sval[8000];
uint64_t *timestamps, timestamp;
struct timeval tv;
int env_opt = MDB_NOMEMINIT | MDB_NOSYNC | MDB_NOSUBDIR | MDB_NORDAHEAD;
srand(time(NULL));
memset(sval, 0, sizeof(sval));
count = 2000; //(rand()%384) + 64;
if (argc > 1)
count = atoi(argv[1]);
values = (int *)malloc(count*sizeof(int));
timestamps = (uint64_t *)calloc(count,sizeof(uint64_t));
key.mv_size = sizeof(int);
data.mv_size = sizeof(sval);
data.mv_data = sval;
values[0] = 42;
values[1] = 17;
for (i = 2; i < count; ++i)
values[i] = values[i - 1] + values[i - 2];
unlink(DBPATH);
E(mdb_env_create(&env));
E(mdb_env_set_mapsize(env, 104857600));
E(mdb_env_set_maxdbs(env, 8));
E(mdb_env_open(env, DBPATH, env_opt, 0664));
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_dbi_open(txn, "id8", MDB_CREATE|MDB_INTEGERKEY, &dbi));
for (i = 0; i < count; ++i) {
(void)gettimeofday(&tv, NULL);
timestamps[i] = tv.tv_usec + 1000000UL * tv.tv_sec;
snprintf(sval, 4000, "Value %d\n", values[i]);
snprintf(sval + 4000, 4000, "Value %d\n", values[i]);
key.mv_data = values + i;
E(mdbx_put_attr(txn, dbi, &key, &data, timestamps[i], MDB_NOOVERWRITE));
}
E(mdb_txn_commit(txn));
E(mdb_env_stat(env, &mst));
mdb_env_close(env);
E(mdb_env_create(&env));
E(mdb_env_set_mapsize(env, 10485760));
E(mdb_env_set_maxdbs(env, 8));
E(mdb_env_open(env, DBPATH, env_opt, 0664));
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi));
for (i = 0; i < count; ++i) {
key.mv_data = values + i;
E(mdbx_get_attr(txn, dbi, &key, &data, &timestamp));
E(timestamps[i] != timestamp);
}
E(mdb_txn_commit(txn));
E(mdb_env_stat(env, &mst));
mdb_env_close(env);
E(mdb_env_create(&env));
E(mdb_env_set_mapsize(env, 104857600));
E(mdb_env_set_maxdbs(env, 8));
E(mdb_env_open(env, DBPATH, env_opt, 0664));
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi));
for (i = 0; i < count; ++i) {
(void)gettimeofday(&tv, NULL);
timestamps[i] = tv.tv_usec + 1000000UL * tv.tv_sec;
key.mv_data = values + i;
E(mdbx_set_attr(txn, dbi, &key, NULL, timestamps[i]));
}
E(mdb_txn_commit(txn));
E(mdb_env_stat(env, &mst));
mdb_env_close(env);
E(mdb_env_create(&env));
E(mdb_env_set_mapsize(env, 10485760));
E(mdb_env_set_maxdbs(env, 8));
E(mdb_env_open(env, DBPATH, env_opt, 0664));
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_dbi_open(txn, "id8", MDB_INTEGERKEY, &dbi));
for (i = 0; i < count; ++i) {
key.mv_data = values + i;
E(mdbx_get_attr(txn, dbi, &key, &data, &timestamp));
E(timestamps[i] != timestamp);
}
E(mdb_txn_commit(txn));
E(mdb_env_stat(env, &mst));
mdb_env_close(env);
return 0;
}