mdbx++: переделка поддержки base58 по RFC-draft (backport).

This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2023-11-24 08:49:43 +03:00
parent 7fc6a1b658
commit 1277fe965d
2 changed files with 204 additions and 217 deletions

View File

@ -1325,8 +1325,7 @@ struct LIBMDBX_API to_base58 {
/// \brief Returns the buffer size in bytes needed for /// \brief Returns the buffer size in bytes needed for
/// [Base58](https://en.wikipedia.org/wiki/Base58) dump of passed slice. /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of passed slice.
MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept {
const size_t bytes = const size_t bytes = (source.length() * 11 + 7) / 8;
source.length() / 8 * 11 + (source.length() % 8 * 43 + 31) / 32;
return wrap_width ? bytes + bytes / wrap_width : bytes; return wrap_width ? bytes + bytes / wrap_width : bytes;
} }
@ -1491,7 +1490,7 @@ struct LIBMDBX_API from_base58 {
/// [Base58](https://en.wikipedia.org/wiki/Base58) dump from a passed slice to /// [Base58](https://en.wikipedia.org/wiki/Base58) dump from a passed slice to
/// decoded data. /// decoded data.
MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept {
return source.length() / 11 * 8 + source.length() % 11 * 32 / 43; return source.length() /* могут быть все нули кодируемые один-к-одному */;
} }
/// \brief Fills the destination with data decoded from /// \brief Fills the destination with data decoded from

View File

@ -207,6 +207,44 @@ __cold bug::~bug() noexcept {}
#endif /* Unused*/ #endif /* Unused*/
struct line_wrapper {
char *line, *ptr;
line_wrapper(char *buf) noexcept : line(buf), ptr(buf) {}
void put(char c, size_t wrap_width) noexcept {
*ptr++ = c;
if (wrap_width && ptr >= wrap_width + line) {
*ptr++ = '\n';
line = ptr;
}
}
void put(const ::mdbx::slice &chunk, size_t wrap_width) noexcept {
if (!wrap_width || wrap_width > (ptr - line) + chunk.length()) {
memcpy(ptr, chunk.data(), chunk.length());
ptr += chunk.length();
} else {
for (size_t i = 0; i < chunk.length(); ++i)
put(chunk.char_ptr()[i], wrap_width);
}
}
};
template <typename TYPE, unsigned INPLACE_BYTES = unsigned(sizeof(void *) * 64)>
struct temp_buffer {
TYPE inplace[(INPLACE_BYTES + sizeof(TYPE) - 1) / sizeof(TYPE)];
const size_t size;
TYPE *const area;
temp_buffer(size_t bytes)
: size((bytes + sizeof(TYPE) - 1) / sizeof(TYPE)),
area((bytes > sizeof(inplace)) ? new TYPE[size] : inplace) {
memset(area, 0, sizeof(TYPE) * size);
}
~temp_buffer() {
if (area != inplace)
delete[] area;
}
TYPE *end() const { return area + size; }
};
} // namespace } // namespace
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -709,156 +747,135 @@ enum : signed char {
IL /* invalid */ = -1 IL /* invalid */ = -1
}; };
static const byte b58_alphabet[58] = { #if MDBX_WORDBITS > 32
using b58_uint = uint_fast64_t;
#else
using b58_uint = uint_fast32_t;
#endif
struct b58_buffer : public temp_buffer<b58_uint> {
b58_buffer(size_t bytes, size_t estimation_ratio_numerator,
size_t estimation_ratio_denominator, size_t extra = 0)
: temp_buffer((/* пересчитываем по указанной пропорции */
bytes = (bytes * estimation_ratio_numerator +
estimation_ratio_denominator - 1) /
estimation_ratio_denominator,
/* учитываем резервный старший байт в каждом слове */
((bytes + sizeof(b58_uint) - 2) / (sizeof(b58_uint) - 1) *
sizeof(b58_uint) +
extra) *
sizeof(b58_uint))) {}
};
static byte b58_8to11(b58_uint &v) noexcept {
static const char b58_alphabet[58] = {
'1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
#ifndef bswap64 const auto i = size_t(v % 58);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
static inline uint64_t bswap64(uint64_t v) noexcept {
#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \
__has_builtin(__builtin_bswap64)
return __builtin_bswap64(v);
#elif defined(_MSC_VER) && !defined(__clang__)
return _byteswap_uint64(v);
#elif defined(__bswap_64)
return __bswap_64(v);
#elif defined(bswap_64)
return bswap_64(v);
#else
return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) |
((v << 24) & UINT64_C(0x0000ff0000000000)) |
((v << 8) & UINT64_C(0x000000ff00000000)) |
((v >> 8) & UINT64_C(0x00000000ff000000)) |
((v >> 24) & UINT64_C(0x0000000000ff0000)) |
((v >> 40) & UINT64_C(0x000000000000ff00));
#endif
}
#endif /* __BYTE_ORDER__ */
#endif /* ifndef bswap64 */
static inline char b58_8to11(uint64_t &v) noexcept {
const unsigned i = unsigned(v % 58);
v /= 58; v /= 58;
return b58_alphabet[i]; return b58_alphabet[i];
} }
static slice b58_encode(b58_buffer &buf, const byte *begin, const byte *end) {
auto high = buf.end();
const auto modulo =
b58_uint((sizeof(b58_uint) > 4) ? UINT64_C(0x1A636A90B07A00) /* 58^9 */
: UINT32_C(0xACAD10) /* 58^4 */);
static_assert(sizeof(modulo) == 4 || sizeof(modulo) == 8, "WTF?");
while (begin < end) {
b58_uint carry = *begin++;
auto ptr = buf.end();
do {
assert(ptr > buf.area);
carry += *--ptr << CHAR_BIT;
*ptr = carry % modulo;
carry /= modulo;
} while (carry || ptr > high);
high = ptr;
}
byte *output = static_cast<byte *>(static_cast<void *>(buf.area));
auto ptr = output;
for (auto porous = high; porous < buf.end();) {
auto chunk = *porous++;
static_assert(sizeof(chunk) == 4 || sizeof(chunk) == 8, "WTF?");
assert(chunk < modulo);
if (sizeof(chunk) > 4) {
ptr[8] = b58_8to11(chunk);
ptr[7] = b58_8to11(chunk);
ptr[6] = b58_8to11(chunk);
ptr[5] = b58_8to11(chunk);
ptr[4] = b58_8to11(chunk);
ptr[3] = b58_8to11(chunk);
ptr[2] = b58_8to11(chunk);
ptr[1] = b58_8to11(chunk);
ptr[0] = b58_8to11(chunk);
ptr += 9;
} else {
ptr[3] = b58_8to11(chunk);
ptr[2] = b58_8to11(chunk);
ptr[1] = b58_8to11(chunk);
ptr[0] = b58_8to11(chunk);
ptr += 4;
}
assert(static_cast<void *>(ptr) < static_cast<void *>(porous));
}
while (output < ptr && *output == '1')
++output;
return slice(output, ptr);
}
char *to_base58::write_bytes(char *__restrict const dest, char *to_base58::write_bytes(char *__restrict const dest,
size_t dest_size) const { size_t dest_size) const {
if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) if (MDBX_UNLIKELY(envisage_result_length() > dest_size))
MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); MDBX_CXX20_UNLIKELY throw_too_small_target_buffer();
auto ptr = dest; auto begin = source.byte_ptr();
auto src = source.byte_ptr(); auto end = source.end_byte_ptr();
size_t left = source.length(); line_wrapper wrapper(dest);
auto line = ptr; while (MDBX_LIKELY(begin < end) && *begin == 0) {
while (MDBX_LIKELY(left > 7)) { wrapper.put('1', wrap_width);
uint64_t v; assert(wrapper.ptr <= dest + dest_size);
std::memcpy(&v, src, 8); ++begin;
src += 8;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
v = bswap64(v);
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#else
#error "FIXME: Unsupported byte order"
#endif /* __BYTE_ORDER__ */
ptr[10] = b58_8to11(v);
ptr[9] = b58_8to11(v);
ptr[8] = b58_8to11(v);
ptr[7] = b58_8to11(v);
ptr[6] = b58_8to11(v);
ptr[5] = b58_8to11(v);
ptr[4] = b58_8to11(v);
ptr[3] = b58_8to11(v);
ptr[2] = b58_8to11(v);
ptr[1] = b58_8to11(v);
ptr[0] = b58_8to11(v);
assert(v == 0);
ptr += 11;
left -= 8;
if (wrap_width && size_t(ptr - line) >= wrap_width && left) {
*ptr = '\n';
line = ++ptr;
}
assert(ptr <= dest + dest_size);
} }
if (left) { b58_buffer buf(end - begin, 11, 8);
uint64_t v = 0; wrapper.put(b58_encode(buf, begin, end), wrap_width);
unsigned parrots = 31; return wrapper.ptr;
do {
v = (v << 8) + *src++;
parrots += 43;
} while (--left);
auto tail = ptr += parrots >> 5;
assert(ptr <= dest + dest_size);
do {
*--tail = b58_8to11(v);
parrots -= 32;
} while (parrots > 31);
assert(v == 0);
}
return ptr;
} }
::std::ostream &to_base58::output(::std::ostream &out) const { ::std::ostream &to_base58::output(::std::ostream &out) const {
if (MDBX_LIKELY(!is_empty())) if (MDBX_LIKELY(!is_empty()))
MDBX_CXX20_LIKELY { MDBX_CXX20_LIKELY {
::std::ostream::sentry sentry(out); ::std::ostream::sentry sentry(out);
auto src = source.byte_ptr(); auto begin = source.byte_ptr();
size_t left = source.length(); auto end = source.end_byte_ptr();
unsigned width = 0; unsigned width = 0;
std::array<char, 11> buf; while (MDBX_LIKELY(begin < end) && *begin == 0) {
out.put('1');
if (wrap_width && ++width >= wrap_width) {
out << ::std::endl;
width = 0;
}
++begin;
}
while (MDBX_LIKELY(left > 7)) { b58_buffer buf(end - begin, 11, 8);
uint64_t v; const auto chunk = b58_encode(buf, begin, end);
std::memcpy(&v, src, 8); if (!wrap_width || wrap_width > width + chunk.length())
src += 8; out.write(chunk.char_ptr(), chunk.length());
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ else {
v = bswap64(v); for (size_t i = 0; i < chunk.length(); ++i) {
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ out.put(chunk.char_ptr()[i]);
#else if (wrap_width && ++width >= wrap_width) {
#error "FIXME: Unsupported byte order"
#endif /* __BYTE_ORDER__ */
buf[10] = b58_8to11(v);
buf[9] = b58_8to11(v);
buf[8] = b58_8to11(v);
buf[7] = b58_8to11(v);
buf[6] = b58_8to11(v);
buf[5] = b58_8to11(v);
buf[4] = b58_8to11(v);
buf[3] = b58_8to11(v);
buf[2] = b58_8to11(v);
buf[1] = b58_8to11(v);
buf[0] = b58_8to11(v);
assert(v == 0);
out.write(&buf.front(), 11);
left -= 8;
if (wrap_width && (width += 11) >= wrap_width && left) {
out << ::std::endl; out << ::std::endl;
width = 0; width = 0;
} }
} }
if (left) {
uint64_t v = 0;
unsigned parrots = 31;
do {
v = (v << 8) + *src++;
parrots += 43;
} while (--left);
auto ptr = buf.end();
do {
*--ptr = b58_8to11(v);
parrots -= 32;
} while (parrots > 31);
assert(v == 0);
out.write(&*ptr, buf.end() - ptr);
} }
} }
return out; return out;
@ -884,10 +901,46 @@ const signed char b58_map[256] = {
IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL // f0 IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL // f0
}; };
static inline signed char b58_11to8(uint64_t &v, const byte c) noexcept { static slice b58_decode(b58_buffer &buf, const byte *begin, const byte *end,
const signed char m = b58_map[c]; bool ignore_spaces) {
v = v * 58 + m; auto high = buf.end();
return m; while (begin < end) {
const auto c = b58_map[*begin++];
if (MDBX_LIKELY(c >= 0)) {
b58_uint carry = c;
auto ptr = buf.end();
do {
assert(ptr > buf.area);
carry += *--ptr * 58;
*ptr = carry & (~b58_uint(0) >> CHAR_BIT);
carry >>= CHAR_BIT * (sizeof(carry) - 1);
} while (carry || ptr > high);
high = ptr;
} else if (MDBX_UNLIKELY(!ignore_spaces || !isspace(begin[-1])))
MDBX_CXX20_UNLIKELY
throw std::domain_error("mdbx::from_base58:: invalid base58 string");
}
byte *output = static_cast<byte *>(static_cast<void *>(buf.area));
auto ptr = output;
for (auto porous = high; porous < buf.end(); ++porous) {
auto chunk = *porous;
static_assert(sizeof(chunk) == 4 || sizeof(chunk) == 8, "WTF?");
assert(chunk <= (~b58_uint(0) >> CHAR_BIT));
if (sizeof(chunk) > 4) {
*ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 6);
*ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 5);
*ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 4);
*ptr++ = byte(chunk >> CHAR_BIT * 3);
}
*ptr++ = byte(chunk >> CHAR_BIT * 2);
*ptr++ = byte(chunk >> CHAR_BIT * 1);
*ptr++ = byte(chunk >> CHAR_BIT * 0);
}
while (output < ptr && *output == 0)
++output;
return slice(output, ptr);
} }
char *from_base58::write_bytes(char *__restrict const dest, char *from_base58::write_bytes(char *__restrict const dest,
@ -896,98 +949,33 @@ char *from_base58::write_bytes(char *__restrict const dest,
MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); MDBX_CXX20_UNLIKELY throw_too_small_target_buffer();
auto ptr = dest; auto ptr = dest;
auto src = source.byte_ptr(); auto begin = source.byte_ptr();
for (auto left = source.length(); left > 0;) { auto const end = source.end_byte_ptr();
if (MDBX_UNLIKELY(isspace(*src)) && ignore_spaces) { while (begin < end && *begin <= '1') {
++src; if (MDBX_LIKELY(*begin == '1'))
--left; MDBX_CXX20_LIKELY *ptr++ = 0;
continue; else if (MDBX_UNLIKELY(!ignore_spaces || !isspace(*begin)))
} MDBX_CXX20_UNLIKELY
if (MDBX_LIKELY(left > 10)) {
uint64_t v = 0;
if (MDBX_UNLIKELY((b58_11to8(v, src[0]) | b58_11to8(v, src[1]) |
b58_11to8(v, src[2]) | b58_11to8(v, src[3]) |
b58_11to8(v, src[4]) | b58_11to8(v, src[5]) |
b58_11to8(v, src[6]) | b58_11to8(v, src[7]) |
b58_11to8(v, src[8]) | b58_11to8(v, src[9]) |
b58_11to8(v, src[10])) < 0))
MDBX_CXX20_UNLIKELY goto bailout;
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
v = bswap64(v);
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#else
#error "FIXME: Unsupported byte order"
#endif /* __BYTE_ORDER__ */
std::memcpy(ptr, &v, 8);
ptr += 8;
src += 11;
left -= 11;
assert(ptr <= dest + dest_size);
continue;
}
constexpr unsigned invalid_length_mask = 1 << 1 | 1 << 4 | 1 << 8;
if (MDBX_UNLIKELY(invalid_length_mask & (1 << left)))
MDBX_CXX20_UNLIKELY goto bailout;
uint64_t v = 1;
unsigned parrots = 0;
do {
if (MDBX_UNLIKELY(b58_11to8(v, *src++) < 0))
MDBX_CXX20_UNLIKELY goto bailout;
parrots += 32;
} while (--left);
auto tail = ptr += parrots / 43;
assert(ptr <= dest + dest_size);
do {
*--tail = byte(v);
v >>= 8;
} while (v > 255);
break;
}
return ptr;
bailout:
throw std::domain_error("mdbx::from_base58:: invalid base58 string"); throw std::domain_error("mdbx::from_base58:: invalid base58 string");
++begin;
}
b58_buffer buf(end - begin, 47, 64);
auto slice = b58_decode(buf, begin, end, ignore_spaces);
memcpy(ptr, slice.data(), slice.length());
return ptr + slice.length();
} }
bool from_base58::is_erroneous() const noexcept { bool from_base58::is_erroneous() const noexcept {
bool got = false; auto begin = source.byte_ptr();
auto src = source.byte_ptr(); auto const end = source.end_byte_ptr();
for (auto left = source.length(); left > 0;) { while (begin < end) {
if (MDBX_UNLIKELY(*src <= ' ') && if (MDBX_UNLIKELY(b58_map[*begin] < 0 &&
MDBX_LIKELY(ignore_spaces && isspace(*src))) { !(ignore_spaces && isspace(*begin))))
++src; return true;
--left; ++begin;
continue;
} }
if (MDBX_LIKELY(left > 10)) {
if (MDBX_UNLIKELY((b58_map[src[0]] | b58_map[src[1]] | b58_map[src[2]] |
b58_map[src[3]] | b58_map[src[4]] | b58_map[src[5]] |
b58_map[src[6]] | b58_map[src[7]] | b58_map[src[8]] |
b58_map[src[9]] | b58_map[src[10]]) < 0))
MDBX_CXX20_UNLIKELY return true;
src += 11;
left -= 11;
got = true;
continue;
}
constexpr unsigned invalid_length_mask = 1 << 1 | 1 << 4 | 1 << 8;
if (invalid_length_mask & (1 << left))
return false; return false;
do
if (MDBX_UNLIKELY(b58_map[*src++] < 0))
MDBX_CXX20_UNLIKELY return true;
while (--left);
got = true;
break;
}
return !got;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------