 |
Vc
1.4.2
SIMD Vector Classes for C++
|
|
48 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
57 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
66 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
75 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
83 #define Vc_MSVC _MSC_FULL_VER
90 #ifdef __INTEL_COMPILER
91 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
92 #elif defined(__clang__) && defined(__apple_build_version__)
93 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
94 #elif defined(__clang__)
95 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
96 #elif defined(__GNUC__)
97 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
98 #elif defined(_MSC_VER)
99 #define Vc_MSVC _MSC_FULL_VER
101 #define Vc_UNSUPPORTED_COMPILER 1
104 #if defined Vc_GCC && Vc_GCC >= 0x60000
105 #define Vc_RESET_DIAGNOSTICS _Pragma("GCC diagnostic pop")
106 #pragma GCC diagnostic push
107 #pragma GCC diagnostic ignored "-Wignored-attributes"
109 #define Vc_RESET_DIAGNOSTICS
118 #pragma warning disable 2922
121 #if __cplusplus < 201103 && (!defined Vc_MSVC || _MSC_VER < 1900)
122 # error "Vc requires support for C++11."
123 #elif __cplusplus >= 201402L
125 # if __cplusplus > 201700L
130 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
135 # if Vc_GCC >= 0x70000 && defined __i386__ && (!defined __GLIBC_PREREQ || !__GLIBC_PREREQ(2,26))
139 # elif Vc_GCC >= 0x40900
140 # define Vc_HAVE_STD_MAX_ALIGN_T 1
142 # define Vc_HAVE_MAX_ALIGN_T 1
144 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
148 # define Vc_HAVE_STD_MAX_ALIGN_T 1
151 #if defined(Vc_GCC) || defined(Vc_CLANG) || defined Vc_APPLECLANG
152 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
156 # define Vc_CDECL __cdecl
157 # define Vc_VDECL __vectorcall
167 #define Scalar 0x00100000
168 #define SSE 0x00200000
169 #define SSE2 0x00300000
170 #define SSE3 0x00400000
171 #define SSSE3 0x00500000
172 #define SSE4_1 0x00600000
173 #define SSE4_2 0x00700000
174 #define AVX 0x00800000
175 #define AVX2 0x00900000
177 #define XOP 0x00000001
178 #define FMA4 0x00000002
179 #define F16C 0x00000004
180 #define POPCNT 0x00000008
181 #define SSE4a 0x00000010
182 #define FMA 0x00000020
183 #define BMI2 0x00000040
185 #define IMPL_MASK 0xFFF00000
186 #define EXT_MASK 0x000FFFFF
200 # elif defined(_M_AMD64)
211 #if defined Vc_ICC && !defined __POPCNT__
212 # if defined __SSE4_2__ || defined __SSE4A__
213 # define __POPCNT__ 1
218 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
223 # if defined(__AVX2__)
224 # define Vc_IMPL_AVX2 1
225 # define Vc_IMPL_AVX 1
226 # elif defined(__AVX__)
227 # define Vc_IMPL_AVX 1
229 # if defined(__SSE4_2__)
230 # define Vc_IMPL_SSE 1
231 # define Vc_IMPL_SSE4_2 1
233 # if defined(__SSE4_1__)
234 # define Vc_IMPL_SSE 1
235 # define Vc_IMPL_SSE4_1 1
237 # if defined(__SSE3__)
238 # define Vc_IMPL_SSE 1
239 # define Vc_IMPL_SSE3 1
241 # if defined(__SSSE3__)
242 # define Vc_IMPL_SSE 1
243 # define Vc_IMPL_SSSE3 1
245 # if defined(__SSE2__)
246 # define Vc_IMPL_SSE 1
247 # define Vc_IMPL_SSE2 1
250 # if defined(Vc_IMPL_SSE)
253 # define Vc_IMPL_Scalar 1
256 # if !defined(Vc_IMPL_Scalar)
258 # define Vc_IMPL_FMA4 1
261 # define Vc_IMPL_XOP 1
264 # define Vc_IMPL_F16C 1
267 # define Vc_IMPL_POPCNT 1
270 # define Vc_IMPL_SSE4a 1
273 # define Vc_IMPL_FMA 1
276 # define Vc_IMPL_BMI2 1
282 # if (Vc_IMPL & IMPL_MASK) == AVX2 // AVX2 supersedes SSE
283 # define Vc_IMPL_AVX2 1
284 # define Vc_IMPL_AVX 1
285 # elif (Vc_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
286 # define Vc_IMPL_AVX 1
287 # elif (Vc_IMPL & IMPL_MASK) == Scalar
288 # define Vc_IMPL_Scalar 1
289 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
290 # define Vc_IMPL_SSE4_2 1
291 # define Vc_IMPL_SSE4_1 1
292 # define Vc_IMPL_SSSE3 1
293 # define Vc_IMPL_SSE3 1
294 # define Vc_IMPL_SSE2 1
295 # define Vc_IMPL_SSE 1
296 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
297 # define Vc_IMPL_SSE4_1 1
298 # define Vc_IMPL_SSSE3 1
299 # define Vc_IMPL_SSE3 1
300 # define Vc_IMPL_SSE2 1
301 # define Vc_IMPL_SSE 1
302 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
303 # define Vc_IMPL_SSSE3 1
304 # define Vc_IMPL_SSE3 1
305 # define Vc_IMPL_SSE2 1
306 # define Vc_IMPL_SSE 1
307 # elif (Vc_IMPL & IMPL_MASK) == SSE3
308 # define Vc_IMPL_SSE3 1
309 # define Vc_IMPL_SSE2 1
310 # define Vc_IMPL_SSE 1
311 # elif (Vc_IMPL & IMPL_MASK) == SSE2
312 # define Vc_IMPL_SSE2 1
313 # define Vc_IMPL_SSE 1
314 # elif (Vc_IMPL & IMPL_MASK) == SSE
315 # define Vc_IMPL_SSE 1
316 # if defined(__SSE4_2__)
317 # define Vc_IMPL_SSE4_2 1
319 # if defined(__SSE4_1__)
320 # define Vc_IMPL_SSE4_1 1
322 # if defined(__SSE3__)
323 # define Vc_IMPL_SSE3 1
325 # if defined(__SSSE3__)
326 # define Vc_IMPL_SSSE3 1
328 # if defined(__SSE2__)
329 # define Vc_IMPL_SSE2 1
331 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
334 # define Vc_IMPL_SSE3 1
335 # define Vc_IMPL_SSE2 1
336 # define Vc_IMPL_SSE 1
339 # define Vc_IMPL_XOP 1
341 # if (Vc_IMPL & FMA4)
342 # define Vc_IMPL_FMA4 1
344 # if (Vc_IMPL & F16C)
345 # define Vc_IMPL_F16C 1
347 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
348 # define Vc_IMPL_POPCNT 1
350 # if (Vc_IMPL & SSE4a)
351 # define Vc_IMPL_SSE4a 1
354 # define Vc_IMPL_FMA 1
356 # if (Vc_IMPL & BMI2)
357 # define Vc_IMPL_BMI2 1
365 # define Vc_USE_VEX_CODING 1
370 # define Vc_IMPL_SSE4_2 1
371 # define Vc_IMPL_SSE4_1 1
372 # define Vc_IMPL_SSSE3 1
373 # define Vc_IMPL_SSE3 1
374 # define Vc_IMPL_SSE2 1
375 # define Vc_IMPL_SSE 1
378 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
379 # if defined(Vc_IMPL_AVX)
380 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
382 # if defined(Vc_IMPL_AVX2)
388 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX)
389 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
390 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
391 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
415 #if defined Vc_IMPL_AVX2
416 #define Vc_DEFAULT_IMPL_AVX2
417 #elif defined Vc_IMPL_AVX
418 #define Vc_DEFAULT_IMPL_AVX
419 #elif defined Vc_IMPL_SSE
420 #define Vc_DEFAULT_IMPL_SSE
421 #elif defined Vc_IMPL_Scalar
422 #define Vc_DEFAULT_IMPL_Scalar
424 #error "Preprocessor logic broken. Please report a bug."
429 namespace Vc_VERSIONED_NAMESPACE
432 typedef signed char int8_t;
433 typedef unsigned char uint8_t;
434 typedef signed short int16_t;
435 typedef unsigned short uint16_t;
436 typedef signed int int32_t;
437 typedef unsigned int uint32_t;
438 typedef signed long long int64_t;
439 typedef unsigned long long uint64_t;
496 ImplementationMask = 0xfff
529 ExtraInstructionsMask = 0xfffff000u
545 return static_cast<Implementation>(Features & ImplementationMask);
550 return static_cast<unsigned int>(impl) == current();
558 return static_cast<unsigned int>(low) <= current() &&
559 static_cast<unsigned int>(high) >= current();
564 static constexpr
bool runs_on(
unsigned int extraInstructions)
566 return (extraInstructions & Features & ExtraInstructionsMask) ==
567 (Features & ExtraInstructionsMask);
577 #ifdef Vc_IMPL_Scalar
579 #elif defined(Vc_IMPL_AVX2)
581 #elif defined(Vc_IMPL_AVX)
583 #elif defined(Vc_IMPL_SSE4_2)
585 #elif defined(Vc_IMPL_SSE4_1)
587 #elif defined(Vc_IMPL_SSSE3)
589 #elif defined(Vc_IMPL_SSE3)
591 #elif defined(Vc_IMPL_SSE2)
603 #ifdef Vc_IMPL_POPCNT
612 #ifdef Vc_USE_VEX_CODING
621 #endif // VC_GLOBAL_H_
@ FmaInstructions
Support for FMA instructions (3 operand variant)
@ AlignOnCacheline
Align on boundary of cache line sizes (e.g.
@ Bmi2Instructions
Support for BMI2 instructions.
@ Sse4aInstructions
Support for SSE4a instructions.
@ SSE42Impl
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
@ Float16cInstructions
Support for float16 conversions in hardware.
static constexpr Implementation current()
Returns the currently used Vc::Implementation.
@ PopcntInstructions
Support for the population count instruction.
@ SSSE3Impl
x86 SSE + SSE2 + SSE3 + SSSE3
@ SSE3Impl
x86 SSE + SSE2 + SSE3
@ AlignOnPage
Align on boundary of page sizes (e.g.
static constexpr bool is_between(Implementation low, Implementation high)
Returns whether the current Vc::Implementation implements at least low and at most high.
@ AlignOnVector
Align on boundary of vector sizes (e.g.
@ SSE41Impl
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
ImplementationT< > CurrentImplementation
static constexpr bool is(Implementation impl)
Returns whether impl is the current Vc::Implementation.
@ XopInstructions
Support for XOP instructions.
@ Fma4Instructions
Support for FMA4 instructions.
@ VexInstructions
Support for ternary instruction coding (VEX)
@ ScalarImpl
uses only fundamental types
static constexpr bool runs_on(unsigned int extraInstructions)
Returns whether the current code would run on a CPU providing extraInstructions.