libmdbx/test/keygen.c++

463 lines
18 KiB
C++
Raw Normal View History

/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
/// \copyright SPDX-License-Identifier: Apache-2.0
#include "test.h++"
static const uint64_t primes[64] = {
/* */
0, 1, 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381,
/* */
UINT64_C(32749), UINT64_C(65521), UINT64_C(131071), UINT64_C(262139),
UINT64_C(524287), UINT64_C(1048573), UINT64_C(2097143), UINT64_C(4194301),
UINT64_C(8388593), UINT64_C(16777213), UINT64_C(33554393),
UINT64_C(67108859), UINT64_C(134217689), UINT64_C(268435399),
UINT64_C(536870909), UINT64_C(1073741789), UINT64_C(2147483647),
UINT64_C(4294967291), UINT64_C(8589934583), UINT64_C(17179869143),
UINT64_C(34359738337), UINT64_C(68719476731), UINT64_C(137438953447),
UINT64_C(274877906899), UINT64_C(549755813881), UINT64_C(1099511627689),
UINT64_C(2199023255531), UINT64_C(4398046511093), UINT64_C(8796093022151),
UINT64_C(17592186044399), UINT64_C(35184372088777),
UINT64_C(70368744177643), UINT64_C(140737488355213),
UINT64_C(281474976710597), UINT64_C(562949953421231),
UINT64_C(1125899906842597), UINT64_C(2251799813685119),
UINT64_C(4503599627370449), UINT64_C(9007199254740881),
UINT64_C(18014398509481951), UINT64_C(36028797018963913),
UINT64_C(72057594037927931), UINT64_C(144115188075855859),
UINT64_C(288230376151711717), UINT64_C(576460752303423433),
UINT64_C(1152921504606846883), UINT64_C(2305843009213693951),
UINT64_C(4611686018427387847), UINT64_C(9223372036854775783)};
/* static unsigned supid_log2(uint64_t v) {
unsigned r = 0;
while (v > 1) {
v >>= 1;
r += 1;
}
return r;
} */
namespace keygen {
2017-05-17 20:10:56 +03:00
/* LY: https://en.wikipedia.org/wiki/Injective_function */
serial_t injective(const serial_t serial,
const unsigned bits /* at least serial_minwith (8) */,
const serial_t salt) {
assert(bits >= serial_minwith && bits <= serial_maxwith);
2017-05-17 20:10:56 +03:00
/* LY: All these "magic" prime numbers were found
* and verified with a bit of brute force. */
static const uint64_t m[64 - serial_minwith + 1] = {
2017-05-17 20:10:56 +03:00
/* 8 - 24 */
113, 157, 397, 653, 1753, 5641, 9697, 23873, 25693, 80833, 105953, 316937,
309277, 834497, 1499933, 4373441, 10184137,
/* 25 - 64 */
10184137, 17279209, 33990377, 67295161, 284404553, 1075238767, 6346721573,
6924051577, 19204053433, 45840188887, 53625693977, 73447827913,
141638870249, 745683604649, 1283334050489, 1100828289853, 2201656586197,
5871903036137, 11238507001417, 45264020802263, 105008404482889,
81921776907059, 199987980256399, 307207457507641, 946769023178273,
2420886491930041, 3601632139991929, 11984491914483833, 21805846439714153,
23171543400565993, 53353226456762893, 155627817337932409,
227827205384840249, 816509268558278821, 576933057762605689,
2623957345935638441, 5048241705479929949, 4634245581946485653,
4613509448041658233, 4952535426879925961};
static const uint8_t s[64 - serial_minwith + 1] = {
2017-05-17 20:10:56 +03:00
/* 8 - 24 */
2, 3, 4, 4, 2, 4, 3, 3, 7, 3, 3, 4, 8, 3, 10, 3, 11,
/* 25 - 64 */
11, 9, 9, 9, 11, 10, 5, 14, 11, 16, 14, 12, 13, 16, 19, 10, 10, 21, 7, 20,
10, 14, 22, 19, 3, 21, 18, 19, 26, 24, 2, 21, 25, 29, 24, 10, 11, 14, 20,
19};
2017-05-17 20:10:56 +03:00
const auto mask = actor_params::serial_mask(bits);
const auto mult = m[bits - 8];
const auto shift = s[bits - 8];
serial_t result = serial * mult;
2017-05-17 20:10:56 +03:00
if (salt) {
const unsigned left = bits / 2;
const unsigned right = bits - left;
result = (result << left) | ((result & mask) >> right);
result = (result ^ salt) * mult;
2017-05-17 20:10:56 +03:00
}
result ^= (result & mask) >> shift;
result &= mask;
log_trace("keygen-injective: serial %" PRIu64 "/%u @%" PRIx64 ",%u,%" PRIu64
" => %" PRIu64 "/%u",
serial, bits, mult, shift, salt, result, bits);
2017-05-17 20:10:56 +03:00
return result;
}
void __hot maker::pair(serial_t serial, const buffer &key, buffer &value,
serial_t value_age, const bool keylen_changeable) {
2017-05-17 20:10:56 +03:00
assert(mapping.width >= serial_minwith && mapping.width <= serial_maxwith);
assert(mapping.split <= mapping.width);
assert(mapping.mesh <= mapping.width);
assert(mapping.rotate <= mapping.width);
assert(mapping.offset <= actor_params::serial_mask(mapping.width));
assert(!(key_essentials.flags &
~(essentials::prng_fill_flag |
unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT))));
assert(!(value_essentials.flags &
~(essentials::prng_fill_flag |
unsigned(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED))));
2017-05-17 20:10:56 +03:00
log_trace("keygen-pair: serial %" PRIu64 ", data-age %" PRIu64, serial,
value_age);
if (mapping.mesh >= serial_minwith) {
serial = (serial & ~actor_params::serial_mask(mapping.mesh)) |
injective(serial, mapping.mesh, salt);
log_trace("keygen-pair: mesh@%u => %" PRIu64, mapping.mesh, serial);
2017-05-17 20:10:56 +03:00
}
if (mapping.rotate) {
const unsigned right = mapping.rotate;
const unsigned left = mapping.width - right;
serial = (serial << left) |
((serial & actor_params::serial_mask(mapping.width)) >> right);
log_trace("keygen-pair: rotate@%u => %" PRIu64 ", 0x%" PRIx64,
mapping.rotate, serial, serial);
2017-05-17 20:10:56 +03:00
}
if (mapping.offset) {
serial =
(serial + mapping.offset) & actor_params::serial_mask(mapping.width);
log_trace("keygen-pair: offset@%" PRIu64 " => %" PRIu64, mapping.offset,
serial);
}
if (base) {
serial += base;
log_trace("keygen-pair: base@%" PRIu64 " => %" PRIu64, base, serial);
}
2017-05-17 20:10:56 +03:00
serial_t key_serial = serial;
serial_t value_serial = (value_age & value_age_mask) << mapping.split;
2017-05-17 20:10:56 +03:00
if (mapping.split) {
if (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) {
key_serial >>= mapping.split;
value_serial += serial & actor_params::serial_mask(mapping.split);
} else {
/* Без MDBX_DUPSORT требуется уникальность ключей, а для этого нельзя
* отбрасывать какие-либо биты serial после инъективного преобразования.
* Поэтому key_serial не трогаем, а в value_serial нелинейно вмешиваем
* запрошенное количество бит из serial */
value_serial +=
(serial ^ (serial >> mapping.split) * UINT64_C(57035339200100753)) &
actor_params::serial_mask(mapping.split);
}
log_trace("keygen-pair: split@%u => k%" PRIu64 ", v%" PRIu64, mapping.split,
key_serial, value_serial);
2017-05-17 20:10:56 +03:00
}
log_trace("keygen-pair: key %" PRIu64 ", value %" PRIu64, key_serial,
value_serial);
key_serial = mk_begin(key_serial, key_essentials, *key);
value_serial = mk_begin(value_serial, value_essentials, *value);
#if 0 /* unused for now */
if (key->value.iov_len + value->value.iov_len > pair_maxlen) {
unsigned extra = key->value.iov_len + value->value.iov_len - pair_maxlen;
if (keylen_changeable &&
key->value.iov_len > std::max(8u, (unsigned)key_essentials.minlen)) {
#if defined(__GNUC__) || defined(__clang__)
const bool coin = __builtin_parityll(serial) != 0;
#else
const bool coin = INT64_C(0xF2CEECA9989BD96A) * int64_t(serial) < 0;
#endif
if (coin) {
const unsigned gap =
key->value.iov_len - std::max(8u, (unsigned)key_essentials.minlen);
const unsigned chop = std::min(gap, extra);
log_trace("keygen-pair: chop %u key-len %u -> %u", chop,
(unsigned)key->value.iov_len,
(unsigned)key->value.iov_len - chop);
key->value.iov_len -= chop;
extra -= chop;
}
}
if (extra && value->value.iov_len >
std::max(8u, (unsigned)value_essentials.minlen)) {
const unsigned gap = value->value.iov_len -
std::max(8u, (unsigned)value_essentials.minlen);
const unsigned chop = std::min(gap, extra);
log_trace("keygen-pair: chop %u value-len %u -> %u", chop,
(unsigned)value->value.iov_len,
(unsigned)value->value.iov_len - chop);
value->value.iov_len -= chop;
extra -= chop;
}
if (keylen_changeable && extra &&
key->value.iov_len > std::max(8u, (unsigned)key_essentials.minlen)) {
const unsigned gap =
key->value.iov_len - std::max(8u, (unsigned)key_essentials.minlen);
const unsigned chop = std::min(gap, extra);
log_trace("keygen-pair: chop %u key-len %u -> %u", chop,
(unsigned)key->value.iov_len,
(unsigned)key->value.iov_len - chop);
key->value.iov_len -= chop;
extra -= chop;
}
}
#else
(void)keylen_changeable;
#endif /* unused for now */
mk_continue(key_serial, key_essentials, *key);
mk_continue(value_serial, value_essentials, *value);
log_pair(logging::trace, "kv", key, value);
2017-05-17 20:10:56 +03:00
}
void maker::setup(const config::actor_params_pod &actor,
2017-05-17 20:10:56 +03:00
unsigned thread_number) {
#if CONSTEXPR_ENUM_FLAGS_OPERATIONS
static_assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT |
MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) <
UINT16_MAX,
"WTF?");
#else
assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT |
MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) <
UINT16_MAX);
#endif
key_essentials.flags = uint16_t(
actor.table_flags &
MDBX_db_flags_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT));
assert(actor.keylen_min <= UINT16_MAX);
key_essentials.minlen = uint16_t(actor.keylen_min);
assert(actor.keylen_max <= UINT32_MAX);
key_essentials.maxlen = std::min(
uint32_t(actor.keylen_max),
uint32_t(mdbx_limits_keysize_max(actor.pagesize, actor.table_flags)));
key_essentials.bits = (key_essentials.maxlen < sizeof(serial_t))
? key_essentials.maxlen * CHAR_BIT
: sizeof(serial_t) * CHAR_BIT;
key_essentials.mask = actor_params::serial_mask(key_essentials.bits);
assert(key_essentials.bits > 63 ||
key_essentials.mask > primes[key_essentials.bits]);
2017-05-17 20:10:56 +03:00
value_essentials.flags = uint16_t(
actor.table_flags &
MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED));
assert(actor.datalen_min <= UINT16_MAX);
value_essentials.minlen = uint16_t(actor.datalen_min);
assert(actor.datalen_max <= UINT32_MAX);
value_essentials.maxlen = std::min(
uint32_t(actor.datalen_max),
uint32_t(mdbx_limits_valsize_max(actor.pagesize, actor.table_flags)));
value_essentials.bits = (value_essentials.maxlen < sizeof(serial_t))
? value_essentials.maxlen * CHAR_BIT
: sizeof(serial_t) * CHAR_BIT;
value_essentials.mask = actor_params::serial_mask(value_essentials.bits);
assert(value_essentials.bits > 63 ||
value_essentials.mask > primes[value_essentials.bits]);
2017-05-17 20:10:56 +03:00
if (!actor.keygen.zero_fill) {
key_essentials.flags |= essentials::prng_fill_flag;
value_essentials.flags |= essentials::prng_fill_flag;
}
2017-05-17 20:10:56 +03:00
mapping = actor.keygen;
const auto split = mapping.split;
while (mapping.split >
value_essentials.bits - essentials::value_age_minwidth ||
mapping.split >= mapping.width)
mapping.split -= 1;
if (split != mapping.split)
log_notice("keygen: reduce mapping-split from %u to %u", split,
mapping.split);
const auto width = mapping.width;
while (unsigned((actor.table_flags & MDBX_DUPSORT)
? mapping.width - mapping.split
: mapping.width) > key_essentials.bits)
mapping.width -= 1;
if (width != mapping.width)
log_notice("keygen: reduce mapping-width from %u to %u", width,
mapping.width);
value_age_bits = value_essentials.bits - mapping.split;
value_age_mask = actor_params::serial_mask(value_age_bits);
assert(value_age_bits >= essentials::value_age_minwidth);
2017-05-17 20:10:56 +03:00
salt = (prng_state ^
(thread_number * 1575554837) * UINT64_C(59386707711075671)) *
UINT64_C(14653293970879851569);
base = actor.serial_base();
}
bool maker::is_unordered() const {
return mapping.rotate ||
mapping.mesh > ((MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT)
? 0
: mapping.split);
}
void maker::seek2end(serial_t &serial) const {
serial = actor_params::serial_mask(mapping.width) - 1;
}
bool maker::increment(serial_t &serial, int64_t delta) const {
if (serial > actor_params::serial_mask(mapping.width)) {
2017-05-17 20:10:56 +03:00
log_extra("keygen-increment: %" PRIu64 " > %" PRIu64 ", overflow", serial,
actor_params::serial_mask(mapping.width));
2017-05-17 20:10:56 +03:00
return false;
}
serial_t target = serial + delta;
if (target > actor_params::serial_mask(mapping.width) ||
((delta > 0) ? target < serial : target > serial)) {
log_extra("keygen-increment: %" PRIu64 "%-" PRId64 " => %" PRIu64
", overflow",
2017-05-17 20:10:56 +03:00
serial, delta, target);
return false;
}
log_extra("keygen-increment: %" PRIu64 "%-" PRId64 " => %" PRIu64
", continue",
serial, delta, target);
2017-05-17 20:10:56 +03:00
serial = target;
return true;
}
//-----------------------------------------------------------------------------
MDBX_NOTHROW_PURE_FUNCTION static inline unsigned length(serial_t serial) {
#if defined(__clang__) && __clang__ > 8
unsigned n = 0;
2017-05-17 20:10:56 +03:00
if (serial > UINT32_MAX) {
n = 4;
serial >>= 32;
}
if (serial > UINT16_MAX) {
n += 2;
serial >>= 16;
}
if (serial > UINT8_MAX) {
n += 1;
serial >>= 8;
}
#else
unsigned n = (serial > UINT32_MAX) ? 4 : 0;
serial = (serial > UINT32_MAX) ? serial >> 32 : serial;
n += (serial > UINT16_MAX) ? 2 : 0;
serial = (serial > UINT16_MAX) ? serial >> 16 : serial;
n += (serial > UINT8_MAX);
serial = (serial > UINT8_MAX) ? serial >> 8 : serial;
#endif
return n + (serial > 0);
2017-05-17 20:10:56 +03:00
}
buffer alloc(size_t limit) {
result *ptr = (result *)malloc(sizeof(result) + limit + 8);
2017-05-17 20:10:56 +03:00
if (unlikely(ptr == nullptr))
failure_perror("malloc(keyvalue_buffer)", errno);
ptr->value.iov_base = ptr->bytes;
ptr->value.iov_len = 0;
ptr->limit = limit + 8;
2017-05-17 20:10:56 +03:00
return buffer(ptr);
}
serial_t __hot maker::mk_begin(serial_t serial, const essentials &params,
result &out) {
assert(out.limit >= params.maxlen);
assert(params.maxlen >= params.minlen);
assert(serial <= params.mask);
if (unlikely(serial > params.mask)) {
#if 1
serial %= primes[params.bits];
assert(params.mask > primes[params.bits]);
#else
const serial_t maxbits = params.maxlen * CHAR_BIT;
serial ^= (serial >> maxbits / 2) *
serial_t((sizeof(serial_t) > 4) ? UINT64_C(40719303417517073)
: UINT32_C(3708688457));
serial &= params.mask;
#endif
assert(params.maxlen >= length(serial));
}
out.value.iov_len = std::max(unsigned(params.minlen), length(serial));
const auto variation = params.maxlen - params.minlen;
if (variation) {
if (serial % (variation + serial_t(1))) {
auto refix = serial * UINT64_C(48835288005252737);
refix ^= refix >> 32;
out.value.iov_len =
std::max(out.value.iov_len,
params.minlen + size_t(1) + size_t(refix) % variation);
}
}
assert(length(serial) <= out.value.iov_len);
assert(out.value.iov_len >= params.minlen);
assert(out.value.iov_len <= params.maxlen);
return serial;
}
void __hot maker::mk_continue(const serial_t serial, const essentials &params,
result &out) {
#if CONSTEXPR_ENUM_FLAGS_OPERATIONS
static_assert(
(essentials::prng_fill_flag &
unsigned(MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERKEY |
MDBX_INTEGERDUP | MDBX_REVERSEKEY | MDBX_REVERSEDUP)) == 0,
"WTF?");
#else
assert((essentials::prng_fill_flag &
unsigned(MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERKEY |
MDBX_INTEGERDUP | MDBX_REVERSEKEY | MDBX_REVERSEDUP)) == 0);
#endif
assert(length(serial) <= out.value.iov_len);
out.value.iov_base = out.bytes;
if (MDBX_db_flags_t(params.flags) & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) {
assert(params.maxlen == params.minlen);
if (MDBX_db_flags_t(params.flags) & (MDBX_INTEGERKEY | MDBX_INTEGERDUP))
assert(params.minlen == 4 || params.minlen == 8);
out.u64 = serial;
if (!is_byteorder_le() && out.value.iov_len != 8)
out.u32 = uint32_t(serial);
} else {
const auto prefix =
std::max(std::min(unsigned(params.minlen), 8u), length(serial));
out.u64 = htobe64(serial);
out.value.iov_base = out.bytes + 8 - prefix;
if (out.value.iov_len > prefix) {
if (params.flags & essentials::prng_fill_flag) {
uint64_t state = serial ^ UINT64_C(0x923ab47b7ee6f6e4);
prng_fill(state, out.bytes + 8, out.value.iov_len - prefix);
} else
memset(out.bytes + 8, '\0', out.value.iov_len - prefix);
}
if (unlikely(MDBX_db_flags_t(params.flags) &
(MDBX_REVERSEKEY | MDBX_REVERSEDUP)))
std::reverse((char *)out.value.iov_base,
(char *)out.value.iov_base + out.value.iov_len);
}
2017-05-23 21:05:54 +03:00
assert(out.value.iov_len >= params.minlen);
assert(out.value.iov_len <= params.maxlen);
assert(out.value.iov_len >= length(serial));
assert(out.value.iov_base >= out.bytes);
assert((char *)out.value.iov_base + out.value.iov_len <=
(char *)&out.bytes + out.limit);
}
void log_pair(logging::loglevel level, const char *prefix, const buffer &key,
buffer &value) {
if (log_enabled(level)) {
char dump_key[4096], dump_value[4096];
logging::output(
level, "%s-pair: key %s, value %s", prefix,
mdbx_dump_val(&key->value, dump_key, sizeof(dump_key)),
mdbx_dump_val(&value->value, dump_value, sizeof(dump_value)));
}
}
} /* namespace keygen */