mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-20 05:18:21 +08:00
mdbx++: is_printable() for UTF8.
Change-Id: I96a58dc97e2f4261eafceb358a1dca7ba55cfa0d
This commit is contained in:
parent
ee902621db
commit
62be36cc9e
2
mdbx.h++
2
mdbx.h++
@ -539,7 +539,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val {
|
|||||||
base64_decode(const ALLOCATOR &allocator = ALLOCATOR()) const;
|
base64_decode(const ALLOCATOR &allocator = ALLOCATOR()) const;
|
||||||
|
|
||||||
__nothrow_pure_function bool
|
__nothrow_pure_function bool
|
||||||
is_printable(bool allow_utf8 = true) const noexcept;
|
is_printable(bool disable_utf8 = false) const noexcept;
|
||||||
__nothrow_pure_function bool
|
__nothrow_pure_function bool
|
||||||
is_hex(bool ignore_spaces = false) const noexcept;
|
is_hex(bool ignore_spaces = false) const noexcept;
|
||||||
__nothrow_pure_function bool
|
__nothrow_pure_function bool
|
||||||
|
106
src/mdbx.c++
106
src/mdbx.c++
@ -440,15 +440,109 @@ __cold void error::throw_exception() const {
|
|||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
bool slice::is_printable(bool allow_utf8) const noexcept {
|
bool slice::is_printable(bool disable_utf8) const noexcept {
|
||||||
if (mdbx_unlikely(allow_utf8)) {
|
enum : byte {
|
||||||
/* FIXME */ NOT_IMPLEMENTED();
|
LS = 5, // shift for UTF8 sequence length
|
||||||
}
|
P_ = 1 << (LS - 1), // printable ASCII flag
|
||||||
|
N_ = 0, // non-printable ASCII
|
||||||
|
second_range_mask = P_ - 1, // mask for range flag
|
||||||
|
r80_BF = P_ | 0, // flag for UTF8 2nd byte range
|
||||||
|
rA0_BF = P_ | 1, // flag for UTF8 2nd byte range
|
||||||
|
r80_9F = P_ | 2, // flag for UTF8 2nd byte range
|
||||||
|
r90_BF = P_ | 3, // flag for UTF8 2nd byte range
|
||||||
|
r80_8F = P_ | 4, // flag for UTF8 2nd byte range
|
||||||
|
|
||||||
|
// valid utf-8 byte sequences
|
||||||
|
// http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
||||||
|
// Code | Bytes | | |
|
||||||
|
// Points | 1st | 2nd | 3rd |4th
|
||||||
|
// --------------------|--------|--------|--------|---
|
||||||
|
C2 = 2 << LS | r80_BF, // U+000080..U+0007FF | C2..DF | 80..BF | |
|
||||||
|
E0 = 3 << LS | rA0_BF, // U+000800..U+000FFF | E0 | A0..BF | 80..BF |
|
||||||
|
E1 = 3 << LS | r80_BF, // U+001000..U+00CFFF | E1..EC | 80..BF | 80..BF |
|
||||||
|
ED = 3 << LS | r80_9F, // U+00D000..U+00D7FF | ED | 80..9F | 80..BF |
|
||||||
|
EE = 3 << LS | r80_BF, // U+00E000..U+00FFFF | EE..EF | 80..BF | 80..BF |
|
||||||
|
F0 = 4 << LS | r90_BF, // U+010000..U+03FFFF | F0 | 90..BF | 80..BF |...
|
||||||
|
F1 = 4 << LS | r80_BF, // U+040000..U+0FFFFF | F1..F3 | 80..BF | 80..BF |...
|
||||||
|
F4 = 4 << LS | r80_BF, // U+100000..U+10FFFF | F4 | 80..8F | 80..BF |...
|
||||||
|
};
|
||||||
|
|
||||||
|
static const byte range_from[] = {0x80, 0xA0, 0x80, 0x90, 0x80};
|
||||||
|
static const byte range_to[] = {0xBF, 0xBF, 0x9F, 0xBF, 0x8F};
|
||||||
|
|
||||||
|
static const byte map[256] = {
|
||||||
|
// 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||||
|
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 00
|
||||||
|
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 10
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, // 20
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, // 30
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, // 40
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, // 50
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, // 60
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, N_, // 70
|
||||||
|
N_, N_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, N_, P_, N_, // 80
|
||||||
|
N_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, N_, P_, P_, // 90
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, // a0
|
||||||
|
P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, // b0
|
||||||
|
P_, P_, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, // c0
|
||||||
|
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, // df
|
||||||
|
E0, E1, E1, E1, E1, E1, E1, E1, E1, E1, E1, E1, E1, ED, EE, EE, // e0
|
||||||
|
F0, F1, F1, F1, F4, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_, P_ // f0
|
||||||
|
};
|
||||||
|
|
||||||
|
if (length() < 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
auto src = byte_ptr();
|
auto src = byte_ptr();
|
||||||
for (const auto end = src + size(); src != end; ++src)
|
const auto end = src + length();
|
||||||
if (mdbx_unlikely(!isprint(*src)))
|
if (mdbx_unlikely(disable_utf8)) {
|
||||||
|
do
|
||||||
|
if (mdbx_unlikely((P_ & map[*src]) == 0))
|
||||||
return false;
|
return false;
|
||||||
|
while (++src < end);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
const auto bits = map[*src];
|
||||||
|
const auto second_from = range_from[bits & second_range_mask];
|
||||||
|
const auto second_to = range_to[bits & second_range_mask];
|
||||||
|
switch (bits >> LS) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case 1:
|
||||||
|
src += 1;
|
||||||
|
continue;
|
||||||
|
case 2:
|
||||||
|
if (unlikely(src + 1 >= end))
|
||||||
|
return false;
|
||||||
|
if (unlikely(src[1] < second_from || src[1] > second_to))
|
||||||
|
return false;
|
||||||
|
src += 2;
|
||||||
|
continue;
|
||||||
|
case 3:
|
||||||
|
if (unlikely(src + 3 >= end))
|
||||||
|
return false;
|
||||||
|
if (unlikely(src[1] < second_from || src[1] > second_to))
|
||||||
|
return false;
|
||||||
|
if (unlikely(src[2] < 0x80 || src[2] > 0xBF))
|
||||||
|
return false;
|
||||||
|
src += 3;
|
||||||
|
continue;
|
||||||
|
case 4:
|
||||||
|
if (unlikely(src + 4 >= end))
|
||||||
|
return false;
|
||||||
|
if (unlikely(src[1] < second_from || src[1] > second_to))
|
||||||
|
return false;
|
||||||
|
if (unlikely(src[2] < 0x80 || src[2] > 0xBF))
|
||||||
|
return false;
|
||||||
|
if (unlikely(src[3] < 0x80 || src[3] > 0xBF))
|
||||||
|
return false;
|
||||||
|
src += 4;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} while (src < end);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user