mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-21 18:58:21 +08:00
mdbx: костыль для GCC при сборке с -m32 -arch=i686 -Ofast
.
Обходное решение проблем сборки посредством GCC с использование опций `-m32 -arch=i686 -Ofast`. Проблема обусловлена ошибкой GCC, из-за которой конструкция `__attribute__((__target__("sse2")))` не включает полноценное использование инструкций SSE и SSE2, если это не было сделано посредством опций командной строки, но была использована опция `-Ofast`. В результате сборка заканчивалась сообщением об ошибке: gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h: In function 'diffcmp2mask_sse2': gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1: error: inlining failed in call to 'always_inline' '_mm_movemask_ps': target specific option mismatch 814 | _mm_movemask_ps (__m128 __A)
This commit is contained in:
parent
5561cec9c5
commit
d02bdcf2bd
45
src/core.c
45
src/core.c
@ -6472,27 +6472,47 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) {
|
|||||||
#define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target)))
|
#define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target)))
|
||||||
#endif /* MDBX_ATTRIBUTE_TARGET */
|
#endif /* MDBX_ATTRIBUTE_TARGET */
|
||||||
|
|
||||||
#if defined(__SSE2__)
|
#ifndef MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND
|
||||||
|
/* Workaround for GCC's bug with `-m32 -march=i686 -Ofast`
|
||||||
|
* gcc/i686-buildroot-linux-gnu/12.2.0/include/xmmintrin.h:814:1:
|
||||||
|
* error: inlining failed in call to 'always_inline' '_mm_movemask_ps':
|
||||||
|
* target specific option mismatch */
|
||||||
|
#if !defined(__FAST_MATH__) || !__FAST_MATH__ || !defined(__GNUC__) || \
|
||||||
|
defined(__e2k__) || defined(__clang__) || defined(__amd64__) || \
|
||||||
|
defined(__SSE2__)
|
||||||
|
#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 0
|
||||||
|
#else
|
||||||
|
#define MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND 1
|
||||||
|
#endif
|
||||||
|
#endif /* MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND */
|
||||||
|
|
||||||
|
#if defined(__SSE2__) && defined(__SSE__)
|
||||||
#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */
|
#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */
|
||||||
#elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__)
|
#elif (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(__amd64__)
|
||||||
#define __SSE2__
|
#define __SSE2__
|
||||||
#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */
|
#define MDBX_ATTRIBUTE_TARGET_SSE2 /* nope */
|
||||||
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__)
|
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \
|
||||||
#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse2")
|
!MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND
|
||||||
|
#define MDBX_ATTRIBUTE_TARGET_SSE2 MDBX_ATTRIBUTE_TARGET("sse,sse2")
|
||||||
#endif /* __SSE2__ */
|
#endif /* __SSE2__ */
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */
|
#define MDBX_ATTRIBUTE_TARGET_AVX2 /* nope */
|
||||||
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__)
|
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \
|
||||||
#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("avx2")
|
!MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND
|
||||||
|
#define MDBX_ATTRIBUTE_TARGET_AVX2 MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2")
|
||||||
#endif /* __AVX2__ */
|
#endif /* __AVX2__ */
|
||||||
|
|
||||||
|
#if defined(MDBX_ATTRIBUTE_TARGET_AVX2)
|
||||||
#if defined(__AVX512BW__)
|
#if defined(__AVX512BW__)
|
||||||
#define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */
|
#define MDBX_ATTRIBUTE_TARGET_AVX512BW /* nope */
|
||||||
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \
|
#elif defined(MDBX_ATTRIBUTE_TARGET) && defined(__ia32__) && \
|
||||||
|
!MDBX_GCC_FASTMATH_i686_SIMD_WORKAROUND && \
|
||||||
(__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0))
|
(__GNUC_PREREQ(6, 0) || __CLANG_PREREQ(5, 0))
|
||||||
#define MDBX_ATTRIBUTE_TARGET_AVX512BW MDBX_ATTRIBUTE_TARGET("avx512bw")
|
#define MDBX_ATTRIBUTE_TARGET_AVX512BW \
|
||||||
|
MDBX_ATTRIBUTE_TARGET("sse,sse2,avx,avx2,avx512bw")
|
||||||
#endif /* __AVX512BW__ */
|
#endif /* __AVX512BW__ */
|
||||||
|
#endif /* MDBX_ATTRIBUTE_TARGET_AVX2 for MDBX_ATTRIBUTE_TARGET_AVX512BW */
|
||||||
|
|
||||||
#ifdef MDBX_ATTRIBUTE_TARGET_SSE2
|
#ifdef MDBX_ATTRIBUTE_TARGET_SSE2
|
||||||
MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned
|
MDBX_ATTRIBUTE_TARGET_SSE2 static __always_inline unsigned
|
||||||
@ -6566,6 +6586,15 @@ diffcmp2mask_avx2(const pgno_t *const ptr, const ptrdiff_t offset,
|
|||||||
return _mm256_movemask_ps(*(const __m256 *)&cmp);
|
return _mm256_movemask_ps(*(const __m256 *)&cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MDBX_ATTRIBUTE_TARGET_AVX2 static __always_inline unsigned
|
||||||
|
diffcmp2mask_sse2avx(const pgno_t *const ptr, const ptrdiff_t offset,
|
||||||
|
const __m128i pattern) {
|
||||||
|
const __m128i f = _mm_loadu_si128((const __m128i *)ptr);
|
||||||
|
const __m128i l = _mm_loadu_si128((const __m128i *)(ptr + offset));
|
||||||
|
const __m128i cmp = _mm_cmpeq_epi32(_mm_sub_epi32(f, l), pattern);
|
||||||
|
return _mm_movemask_ps(*(const __m128 *)&cmp);
|
||||||
|
}
|
||||||
|
|
||||||
MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t *
|
MDBX_MAYBE_UNUSED __hot MDBX_ATTRIBUTE_TARGET_AVX2 static pgno_t *
|
||||||
scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) {
|
scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) {
|
||||||
assert(seq > 0 && len > seq);
|
assert(seq > 0 && len > seq);
|
||||||
@ -6611,7 +6640,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) {
|
|||||||
}
|
}
|
||||||
#endif /* __SANITIZE_ADDRESS__ */
|
#endif /* __SANITIZE_ADDRESS__ */
|
||||||
if (range - 3 > detent) {
|
if (range - 3 > detent) {
|
||||||
mask = diffcmp2mask_sse2(range - 3, offset, *(const __m128i *)&pattern);
|
mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern);
|
||||||
if (mask)
|
if (mask)
|
||||||
return range + 28 - __builtin_clz(mask);
|
return range + 28 - __builtin_clz(mask);
|
||||||
range -= 4;
|
range -= 4;
|
||||||
@ -6685,7 +6714,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) {
|
|||||||
range -= 8;
|
range -= 8;
|
||||||
}
|
}
|
||||||
if (range - 3 > detent) {
|
if (range - 3 > detent) {
|
||||||
mask = diffcmp2mask_sse2(range - 3, offset, *(const __m128i *)&pattern);
|
mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern);
|
||||||
if (mask)
|
if (mask)
|
||||||
return range + 28 - __builtin_clz(mask);
|
return range + 28 - __builtin_clz(mask);
|
||||||
range -= 4;
|
range -= 4;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user