From bf28856ac5009627877d019c1c0d89610383fea4 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Wed, 22 Jan 2020 03:43:09 +0300 Subject: [PATCH] mdbx: add key-making functions. Related to https://github.com/leo-yuriev/libmdbx/issues/76 Change-Id: I1edc8efd323af9adb53e6c2155e2ea39a1e575f4 --- mdbx.h | 35 ++++++++++-- src/elements/core.c | 132 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index 959b92bf..eaef641b 100644 --- a/mdbx.h +++ b/mdbx.h @@ -298,12 +298,12 @@ * - optimize (bulk) loading speed * - (temporarily) reduce robustness to gain even more speed * - gather statistics about the database - * - define custom sort orders * - estimate size of range query result * - double perfomance by LIFO reclaiming on storages with write-back * - use sequences and canary markers * - use lack-of-space callback (aka OOM-KICK) * - use exclusive mode + * - define custom sort orders (but this is recommended to be avoided) * * **** RESTRICTIONS & CAVEATS *************************************************** @@ -2466,9 +2466,6 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b); * In contrast to LMDB, the MDBX allow this function to be called from multiple * concurrent transactions or threads in the same process. * - * Legacy mdbx_dbi_open() correspond to calling mdbx_dbi_open_ex() with the null - * keycmp and datacmp arguments. - * * To use named database (with name != NULL), mdbx_env_set_maxdbs() * must be called before opening the environment. Table names are * keys in the internal unnamed database, and may be read but not written. @@ -2509,10 +2506,19 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b); * Create the named database if it doesn't exist. This option is not * allowed in a read-only transaction or a read-only environment. * + * [out] dbi Address where the new MDBX_dbi handle will be stored. + * + * For mdbx_dbi_open_ex() additional arguments allow you to set custom + * comparison functions for keys and values (for multimaps). + * However, I recommend not using custom comparison functions, but instead + * converting the keys to one of the forms that are suitable for built-in + * comparators. The main reason for this is that you can't use mdbx_chk tools + * with a custom comparators. For instance take look to the mdbx_key_from_xxx() + * functions. + * * [in] keycmp Optional custom key comparison function for a database. * [in] datacmp Optional custom data comparison function for a database, takes * effect only if database was opened with the MDB_DUPSORT flag. - * [out] dbi Address where the new MDBX_dbi handle will be stored. * * Returns A non-zero error value on failure and 0 on success, some * possible errors are: @@ -2530,6 +2536,25 @@ LIBMDBX_API int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, LIBMDBX_API int mdbx_dbi_open(MDBX_txn *txn, const char *name, unsigned flags, MDBX_dbi *dbi); +/* Key-making functions to avoid custom comparators. + * + * The mdbx_key_from_jsonInteger() build key which are comparable with + * keys created by mdbx_key_from_double(). So this allow mix int64 and IEEE754 + * double values in one index for JSON-numbers with restriction for integer + * numbers range corresponding to RFC-7159 (i.e. [-(2**53)+1, (2**53)-1]. + * See bottom of page 6 at https://tools.ietf.org/html/rfc7159 */ +LIBMDBX_API uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer); +LIBMDBX_API uint64_t mdbx_key_from_double(const double ieee754_64bit); +LIBMDBX_API uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit); +LIBMDBX_API uint32_t mdbx_key_from_float(const float ieee754_32bit); +LIBMDBX_API uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit); +__inline uint64_t mdbx_key_from_int64(const int64_t i64) { + return UINT64_C(0x8000000000000000) + i64; +} +__inline uint32_t mdbx_key_from_int32(const int32_t i32) { + return UINT32_C(0x80000000) + i32; +} + /* Retrieve statistics for a database. * * [in] txn A transaction handle returned by mdbx_txn_begin(). diff --git a/src/elements/core.c b/src/elements/core.c index 6292be89..a4c53c74 100644 --- a/src/elements/core.c +++ b/src/elements/core.c @@ -17340,6 +17340,138 @@ __cold intptr_t mdbx_limits_txnsize_max(intptr_t pagesize) { : (intptr_t)MAX_MAPSIZE; } +/*** Key-making functions to avoid custom comparators *************************/ + +static __always_inline uint64_t double2key(const double *const ptr) { + STATIC_ASSERT(sizeof(double) == sizeof(int64_t)); + const int64_t i64 = *(const int64_t *)ptr; + return (i64 >= 0) ? /* positive */ UINT64_C(0x8000000000000000) + i64 + : /* negative */ (uint64_t)-i64; +} + +static __always_inline uint32_t float2key(const float *const ptr) { + STATIC_ASSERT(sizeof(float) == sizeof(int32_t)); + const int32_t i32 = *(const int32_t *)ptr; + return (i32 >= 0) ? /* positive */ UINT32_C(0x80000000) + i32 + : /* negative */ (uint32_t)-i32; +} + +uint64_t mdbx_key_from_double(const double ieee754_64bit) { + return double2key(&ieee754_64bit); +} + +uint64_t mdbx_key_from_ptrdouble(const double *const ieee754_64bit) { + return double2key(ieee754_64bit); +} + +uint32_t mdbx_key_from_float(const float ieee754_32bit) { + return float2key(&ieee754_32bit); +} + +uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) { + return float2key(ieee754_32bit); +} + +#define IEEE754_DOUBLE_MANTISSA_SIZE 52 +#define IEEE754_DOUBLE_BIAS 0x3FF +#define IEEE754_DOUBLE_MAX 0x7FF +#define IEEE754_DOUBLE_IMPLICIT_LEAD UINT64_C(0x0010000000000000) +#define IEEE754_DOUBLE_MANTISSA_MASK UINT64_C(0x000FFFFFFFFFFFFF) +#define JSON_MAX_SAFE_INTEGER ((INT64_C(1) << 53) - 1) +#define JSON_MIN_SAFE_INTEGER (-JSON_MAX_SAFE_INTEGER) + +static __inline int clz64(uint64_t value) { +#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_clzl) + if (sizeof(value) == sizeof(int)) + return __builtin_clz(value); + if (sizeof(value) == sizeof(long)) + return __builtin_clzl(value); +#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ + __has_builtin(__builtin_clzll) + return __builtin_clzll(value); +#endif /* have(long long) && long long == uint64_t */ +#endif /* GNU C */ + +#if defined(_MSC_VER) + unsigned long index; +#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64) + _BitScanReverse64(&index, value); + return index; +#else + if (value > UINT32_MAX) { + _BitScanReverse(&index, (uint32_t)(value >> 32)); + return index; + } + _BitScanReverse(&index, (uint32_t)value); + return index + 32; +#endif +#endif /* MSVC */ + + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + value |= value >> 32; + static const uint8_t debruijn_clz64[64] = { + 63, 16, 62, 7, 15, 36, 61, 3, 6, 14, 22, 26, 35, 47, 60, 2, + 9, 5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59, 1, + 17, 8, 37, 4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18, + 38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0}; + return debruijn_clz64[value * UINT64_C(0x03F79D71B4CB0A89) >> 58]; +} + +uint64_t mdbx_key_from_jsonInteger(const int64_t json_integer) { + const uint64_t biased_zero = UINT64_C(0x8000000000000000); + if (json_integer > 0) { + if (unlikely(json_integer < JSON_MAX_SAFE_INTEGER)) + goto fallback; + const uint64_t u64 = json_integer; + const int extra_zeros = + clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1); + assert(extra_zeros >= 0); + const uint64_t mantissa = u64 << extra_zeros; + assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD); + assert(mantissa <= + IEEE754_DOUBLE_MANTISSA_MASK + IEEE754_DOUBLE_IMPLICIT_LEAD); + const uint64_t exponent = + IEEE754_DOUBLE_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - extra_zeros; + assert(exponent > 0 && exponent <= IEEE754_DOUBLE_MAX); + const uint64_t key = biased_zero + + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) + + (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); + assert(key == mdbx_key_from_double((double)json_integer)); + return key; + } + + if (json_integer < 0) { + if (unlikely(json_integer < JSON_MIN_SAFE_INTEGER)) + goto fallback; + const uint64_t u64 = -json_integer; + const int extra_zeros = + clz64(u64) - (64 - IEEE754_DOUBLE_MANTISSA_SIZE - 1); + assert(extra_zeros >= 0); + const uint64_t mantissa = u64 << extra_zeros; + assert(mantissa >= IEEE754_DOUBLE_IMPLICIT_LEAD); + assert(mantissa <= + IEEE754_DOUBLE_MANTISSA_MASK + IEEE754_DOUBLE_IMPLICIT_LEAD); + const uint64_t exponent = + IEEE754_DOUBLE_BIAS + IEEE754_DOUBLE_MANTISSA_SIZE - extra_zeros; + assert(exponent > 0 && exponent <= IEEE754_DOUBLE_MAX); + const uint64_t key = biased_zero - + (exponent << IEEE754_DOUBLE_MANTISSA_SIZE) - + (mantissa - IEEE754_DOUBLE_IMPLICIT_LEAD); + assert(key == mdbx_key_from_double((double)json_integer)); + return key; + } + + return biased_zero; + +fallback:; + const double ieee754_64bit = (double)json_integer; + return double2key(&ieee754_64bit); +} + /*** Attribute support functions for Nexenta **********************************/ #ifdef MDBX_NEXENTA_ATTRS