mdbx: add key-making functions.

Related to https://github.com/leo-yuriev/libmdbx/issues/76

Change-Id: I1edc8efd323af9adb53e6c2155e2ea39a1e575f4
This commit is contained in:
Leonid Yuriev 2020-01-22 03:43:09 +03:00
parent 700f3514b3
commit bf28856ac5
2 changed files with 162 additions and 5 deletions

35
mdbx.h
View File

@ -298,12 +298,12 @@
* - optimize (bulk) loading speed
* - (temporarily) reduce robustness to gain even more speed
* - gather statistics about the database
* - define custom sort orders
* - estimate size of range query result
* - double perfomance by LIFO reclaiming on storages with write-back
* - use sequences and canary markers
* - use lack-of-space callback (aka OOM-KICK)
* - use exclusive mode
* - define custom sort orders (but this is recommended to be avoided)
*
*
**** RESTRICTIONS & CAVEATS ***************************************************
@ -2466,9 +2466,6 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b);
* In contrast to LMDB, the MDBX allow this function to be called from multiple
* concurrent transactions or threads in the same process.
*
* Legacy mdbx_dbi_open() correspond to calling mdbx_dbi_open_ex() with the null
* keycmp and datacmp arguments.
*
* To use named database (with name != NULL), mdbx_env_set_maxdbs()
* must be called before opening the environment. Table names are
* keys in the internal unnamed database, and may be read but not written.
@ -2509,10 +2506,19 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b);
* Create the named database if it doesn't exist. This option is not
* allowed in a read-only transaction or a read-only environment.
*
* [out] dbi Address where the new MDBX_dbi handle will be stored.
*
* For mdbx_dbi_open_ex() additional arguments allow you to set custom
* comparison functions for keys and values (for multimaps).
* However, I recommend not using custom comparison functions, but instead
* converting the keys to one of the forms that are suitable for built-in
* comparators. The main reason for this is that you can't use mdbx_chk tools
* with a custom comparators. For instance take look to the mdbx_key_from_xxx()
* functions.
*
* [in] keycmp Optional custom key comparison function for a database.
* [in] datacmp Optional custom data comparison function for a database, takes
* effect only if database was opened with the MDB_DUPSORT flag.
* [out] dbi Address where the new MDBX_dbi handle will be stored.
*
* Returns A non-zero error value on failure and 0 on success, some
* possible errors are:
@ -2530,6 +2536,25 @@ LIBMDBX_API int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name,
LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name, unsigned flags,
MDBX_dbi *dbi);
/* Key-making functions to avoid custom comparators.
*
* The mdbx_key_from_jsonInteger() build key which are comparable with
* keys created by mdbx_key_from_double(). So this allow mix int64 and IEEE754
* double values in one index for JSON-numbers with restriction for integer
* numbers range corresponding to RFC-7159 (i.e. [-(2**53)+1, (2**53)-1].
* See bottom of page 6 at https://tools.ietf.org/html/rfc7159 */
LIBMDBX_API uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer);
LIBMDBX_API uint64_t mdbx_key_from_double(const double ieee754_64bit);
LIBMDBX_API uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit);
LIBMDBX_API uint32_t mdbx_key_from_float(const float ieee754_32bit);
LIBMDBX_API uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit);
__inline uint64_t mdbx_key_from_int64(const int64_t i64) {
return UINT64_C(0x8000000000000000) + i64;
}
__inline uint32_t mdbx_key_from_int32(const int32_t i32) {
return UINT32_C(0x80000000) + i32;
}
/* Retrieve statistics for a database.
*
* [in] txn A transaction handle returned by mdbx_txn_begin().

View File

@ -17340,6 +17340,138 @@ __cold intptr_t mdbx_limits_txnsize_max(intptr_t pagesize) {
: (intptr_t)MAX_MAPSIZE;
}
/*** Key-making functions to avoid custom comparators *************************/
static __always_inline uint64_t double2key(const double *const ptr) {
STATIC_ASSERT(sizeof(double) == sizeof(int64_t));
const int64_t i64 = *(const int64_t *)ptr;
return (i64 >= 0) ? /* positive */ UINT64_C(0x8000000000000000) + i64
: /* negative */ (uint64_t)-i64;
}
static __always_inline uint32_t float2key(const float *const ptr) {
STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
const int32_t i32 = *(const int32_t *)ptr;
return (i32 >= 0) ? /* positive */ UINT32_C(0x80000000) + i32
: /* negative */ (uint32_t)-i32;
}
uint64_t mdbx_key_from_double(const double ieee754_64bit) {
return double2key(&ieee754_64bit);
}
uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit) {
return double2key(ieee754_64bit);
}
uint32_t mdbx_key_from_float(const float ieee754_32bit) {
return float2key(&ieee754_32bit);
}
uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) {
return float2key(ieee754_32bit);
}
#define IEEE754_DOUBLE_MANTISSA_SIZE 52
#define IEEE754_DOUBLE_BIAS 0x3FF
#define IEEE754_DOUBLE_MAX 0x7FF
#define IEEE754_DOUBLE_IMPLICIT_LEAD UINT64_C(0x0010000000000000)
#define IEEE754_DOUBLE_MANTISSA_MASK UINT64_C(0x000FFFFFFFFFFFFF)
#define JSON_MAX_SAFE_INTEGER ((INT64_C(1) << 53) - 1)
#define JSON_MIN_SAFE_INTEGER (-JSON_MAX_SAFE_INTEGER)
static __inline int clz64(uint64_t value) {
#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_clzl)
if (sizeof(value) == sizeof(int))
return __builtin_clz(value);
if (sizeof(value) == sizeof(long))
return __builtin_clzl(value);
#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \
__has_builtin(__builtin_clzll)
return __builtin_clzll(value);
#endif /* have(long long) && long long == uint64_t */
#endif /* GNU C */
#if defined(_MSC_VER)
unsigned long index;
#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64)
_BitScanReverse64(&index, value);
return index;
#else
if (value > UINT32_MAX) {
_BitScanReverse(&index, (uint32_t)(value >> 32));
return index;
}
_BitScanReverse(&index, (uint32_t)value);
return index + 32;
#endif
#endif /* MSVC */
value |= value >> 1;
value |= value >> 2;
value |= value >> 4;
value |= value >> 8;
value |= value >> 16;
value |= value >> 32;
static const uint8_t debruijn_clz64[64] = {
63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2,
9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1,
17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18,
38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0};
return debruijn_clz64[value * UINT64_C(0x03F79D71B4CB0A89) >> 58];
}
uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) {
const uint64_t biased_zero = UINT64_C(0x8000000000000000);
if (json_integer > 0) {
if (unlikely(json_integer < JSON_MAX_SAFE_INTEGER))
goto fallback;
const uint64_t u64 = json_integer;
const int extra_zeros =
clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1);
assert(extra_zeros >= 0);
const uint64_t mantissa = u64 << extra_zeros;
assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD);
assert(mantissa <=
IEEE754_DOUBLE_MANTISSA_MASK + IEEE754_DOUBLE_IMPLICIT_LEAD);
const uint64_t exponent =
IEEE754_DOUBLE_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - extra_zeros;
assert(exponent > 0 && exponent <= IEEE754_DOUBLE_MAX);
const uint64_t key = biased_zero +
(exponent << IEEE754_DOUBLE_MANTISSA_SIZE) +
(mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD);
assert(key == mdbx_key_from_double((double)json_integer));
return key;
}
if (json_integer < 0) {
if (unlikely(json_integer < JSON_MIN_SAFE_INTEGER))
goto fallback;
const uint64_t u64 = -json_integer;
const int extra_zeros =
clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1);
assert(extra_zeros >= 0);
const uint64_t mantissa = u64 << extra_zeros;
assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD);
assert(mantissa <=
IEEE754_DOUBLE_MANTISSA_MASK + IEEE754_DOUBLE_IMPLICIT_LEAD);
const uint64_t exponent =
IEEE754_DOUBLE_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - extra_zeros;
assert(exponent > 0 && exponent <= IEEE754_DOUBLE_MAX);
const uint64_t key = biased_zero -
(exponent << IEEE754_DOUBLE_MANTISSA_SIZE) -
(mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD);
assert(key == mdbx_key_from_double((double)json_integer));
return key;
}
return biased_zero;
fallback:;
const double ieee754_64bit = (double)json_integer;
return double2key(&ieee754_64bit);
}
/*** Attribute support functions for Nexenta **********************************/
#ifdef MDBX_NEXENTA_ATTRS