mdbx: rework/fix unaligned access.

Change-Id: Ib0208e78786ac84551384ed57ac580fe0717840e
This commit is contained in:
Leonid Yuriev 2019-10-23 23:46:12 +03:00
parent b7d27c1b36
commit 81fd0beb1a
5 changed files with 620 additions and 548 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1273,11 +1273,13 @@ typedef struct MDBX_node {
uint32_t mn_dsize; uint32_t mn_dsize;
uint32_t mn_pgno32; uint32_t mn_pgno32;
}; };
uint16_t mn_flags; /* see mdbx_node */ uint8_t mn_flags; /* see mdbx_node flags */
uint8_t mn_extra;
uint16_t mn_ksize; /* key size */ uint16_t mn_ksize; /* key size */
#else #else
uint16_t mn_ksize; /* key size */ uint16_t mn_ksize; /* key size */
uint16_t mn_flags; /* see mdbx_node */ uint8_t mn_extra;
uint8_t mn_flags; /* see mdbx_node flags */
union { union {
uint32_t mn_pgno32; uint32_t mn_pgno32;
uint32_t mn_dsize; uint32_t mn_dsize;

View File

@ -267,8 +267,11 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
#endif /* all x86 */ #endif /* all x86 */
#if !defined(MDBX_UNALIGNED_OK) #if !defined(MDBX_UNALIGNED_OK)
#if (defined(__ia32__) || defined(__e2k__) || \ #if defined(_MSC_VER)
defined(__ARM_FEATURE_UNALIGNED)) && \ #define MDBX_UNALIGNED_OK 1 /* avoid MSVC misoptimization */
#elif __CLANG_PREREQ(5, 0) || __GNUC_PREREQ(5, 0)
#define MDBX_UNALIGNED_OK 0 /* expecting optimization is well done */
#elif (defined(__ia32__) || defined(__ARM_FEATURE_UNALIGNED)) && \
!defined(__ALIGNED__) !defined(__ALIGNED__)
#define MDBX_UNALIGNED_OK 1 #define MDBX_UNALIGNED_OK 1
#else #else

View File

@ -408,93 +408,97 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key,
const MDBX_val *data) { const MDBX_val *data) {
char *bad = ""; char *bad = "";
pgno_t *iptr = data->iov_base; pgno_t *iptr = data->iov_base;
txnid_t txnid = *(txnid_t *)key->iov_base;
if (key->iov_len != sizeof(txnid_t)) if (key->iov_len != sizeof(txnid_t))
problem_add("entry", record_number, "wrong txn-id size", problem_add("entry", record_number, "wrong txn-id size",
"key-size %" PRIiPTR, key->iov_len); "key-size %" PRIiPTR, key->iov_len);
else if (txnid < 1 || txnid > envinfo.mi_recent_txnid)
problem_add("entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid);
else { else {
if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) txnid_t txnid;
problem_add("entry", txnid, "wrong idl size", "%" PRIuPTR, data->iov_len); memcpy(&txnid, key->iov_base, sizeof(txnid));
size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; if (txnid < 1 || txnid > envinfo.mi_recent_txnid)
if (number < 1 || number > MDBX_PNL_MAX) problem_add("entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid);
problem_add("entry", txnid, "wrong idl length", "%" PRIuPTR, number); else {
else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t))
problem_add("entry", txnid, "trimmed idl", problem_add("entry", txnid, "wrong idl size", "%" PRIuPTR,
"%" PRIuSIZE " > %" PRIuSIZE " (corruption)", data->iov_len);
(number + 1) * sizeof(pgno_t), data->iov_len); size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0;
number = data->iov_len / sizeof(pgno_t) - 1; if (number < 1 || number > MDBX_PNL_MAX)
} else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= problem_add("entry", txnid, "wrong idl length", "%" PRIuPTR, number);
/* LY: allow gap upto one page. it is ok else if ((number + 1) * sizeof(pgno_t) > data->iov_len) {
* and better than shink-and-retry inside mdbx_update_gc() */ problem_add("entry", txnid, "trimmed idl",
envstat.ms_psize) "%" PRIuSIZE " > %" PRIuSIZE " (corruption)",
problem_add("entry", txnid, "extra idl space", (number + 1) * sizeof(pgno_t), data->iov_len);
"%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", number = data->iov_len / sizeof(pgno_t) - 1;
(number + 1) * sizeof(pgno_t), data->iov_len); } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >=
/* LY: allow gap upto one page. it is ok
* and better than shink-and-retry inside mdbx_update_gc() */
envstat.ms_psize)
problem_add("entry", txnid, "extra idl space",
"%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)",
(number + 1) * sizeof(pgno_t), data->iov_len);
gc_pages += number; gc_pages += number;
if (envinfo.mi_latter_reader_txnid > txnid) if (envinfo.mi_latter_reader_txnid > txnid)
reclaimable_pages += number; reclaimable_pages += number;
pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : txn->mt_next_pgno; pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : txn->mt_next_pgno;
pgno_t span = 1; pgno_t span = 1;
for (unsigned i = 0; i < number; ++i) { for (unsigned i = 0; i < number; ++i) {
const pgno_t pgno = iptr[i]; const pgno_t pgno = iptr[i];
if (pgno < NUM_METAS) if (pgno < NUM_METAS)
problem_add("entry", txnid, "wrong idl entry", problem_add("entry", txnid, "wrong idl entry",
"pgno %" PRIaPGNO " < meta-pages %u", pgno, NUM_METAS); "pgno %" PRIaPGNO " < meta-pages %u", pgno, NUM_METAS);
else if (pgno >= backed_pages) else if (pgno >= backed_pages)
problem_add("entry", txnid, "wrong idl entry", problem_add("entry", txnid, "wrong idl entry",
"pgno %" PRIaPGNO " > backed-pages %" PRIu64, pgno, "pgno %" PRIaPGNO " > backed-pages %" PRIu64, pgno,
backed_pages); backed_pages);
else if (pgno >= alloc_pages) else if (pgno >= alloc_pages)
problem_add("entry", txnid, "wrong idl entry", problem_add("entry", txnid, "wrong idl entry",
"pgno %" PRIaPGNO " > alloc-pages %" PRIu64, pgno, "pgno %" PRIaPGNO " > alloc-pages %" PRIu64, pgno,
alloc_pages - 1); alloc_pages - 1);
else { else {
if (MDBX_PNL_DISORDERED(prev, pgno)) { if (MDBX_PNL_DISORDERED(prev, pgno)) {
bad = " [bad sequence]"; bad = " [bad sequence]";
problem_add("entry", txnid, "bad sequence", problem_add("entry", txnid, "bad sequence",
"%" PRIaPGNO " %c [%u].%" PRIaPGNO, prev, "%" PRIaPGNO " %c [%u].%" PRIaPGNO, prev,
(prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'),
i, pgno); i, pgno);
}
if (walk.pagemap) {
int idx = walk.pagemap[pgno];
if (idx == 0)
walk.pagemap[pgno] = -1;
else if (idx > 0)
problem_add("page", pgno, "already used", "by %s",
walk.dbi[idx - 1].name);
else
problem_add("page", pgno, "already listed in GC", nullptr);
}
} }
if (walk.pagemap) { prev = pgno;
int idx = walk.pagemap[pgno]; while (i + span < number &&
if (idx == 0)
walk.pagemap[pgno] = -1;
else if (idx > 0)
problem_add("page", pgno, "already used", "by %s",
walk.dbi[idx - 1].name);
else
problem_add("page", pgno, "already listed in GC", nullptr);
}
}
prev = pgno;
while (i + span < number &&
iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span)
: pgno_sub(pgno, span)))
++span;
}
if (verbose > 3 && !only_subdb) {
print(" transaction %" PRIaTXN ", %" PRIuPTR
" pages, maxspan %" PRIaPGNO "%s\n",
txnid, number, span, bad);
if (verbose > 4) {
for (unsigned i = 0; i < number; i += span) {
const pgno_t pgno = iptr[i];
for (span = 1;
i + span < number &&
iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span)
: pgno_sub(pgno, span)); : pgno_sub(pgno, span)))
++span) ++span;
; }
if (span > 1) { if (verbose > 3 && !only_subdb) {
print(" %9" PRIaPGNO "[%" PRIaPGNO "]\n", pgno, span); print(" transaction %" PRIaTXN ", %" PRIuPTR
} else " pages, maxspan %" PRIaPGNO "%s\n",
print(" %9" PRIaPGNO "\n", pgno); txnid, number, span, bad);
if (verbose > 4) {
for (unsigned i = 0; i < number; i += span) {
const pgno_t pgno = iptr[i];
for (span = 1;
i + span < number &&
iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span)
: pgno_sub(pgno, span));
++span)
;
if (span > 1) {
print(" %9" PRIaPGNO "[%" PRIaPGNO "]\n", pgno, span);
} else
print(" %9" PRIaPGNO "\n", pgno);
}
} }
} }
} }

View File

@ -124,8 +124,6 @@ inline bool parse_option_intptr(int argc, char *const argv[], int &narg,
#pragma pack(push, 1) #pragma pack(push, 1)
struct keygen_params_pod { struct keygen_params_pod {
keygen_case keycase;
/* Параметры генератора пар key-value. /* Параметры генератора пар key-value.
* *
* Ключи и значения генерируются по задаваемым параметрам на основе "плоской" * Ключи и значения генерируются по задаваемым параметрам на основе "плоской"
@ -232,11 +230,10 @@ struct keygen_params_pod {
uint8_t split; uint8_t split;
uint32_t seed; uint32_t seed;
uint64_t offset; uint64_t offset;
keygen_case keycase;
}; };
struct actor_params_pod { struct actor_params_pod {
unsigned loglevel;
unsigned mode_flags; unsigned mode_flags;
unsigned table_flags; unsigned table_flags;
intptr_t size_lower; intptr_t size_lower;
@ -265,6 +262,7 @@ struct actor_params_pod {
unsigned max_tables; unsigned max_tables;
keygen_params_pod keygen; keygen_params_pod keygen;
uint8_t loglevel;
bool drop_table; bool drop_table;
bool ignore_dbfull; bool ignore_dbfull;
bool speculum; bool speculum;