Vc  1.4.2
SIMD Vector Classes for C++
global.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_GLOBAL_H_
29 #define VC_GLOBAL_H_
30 
31 #include <cstdint>
32 #include "fwddecl.h"
33 
34 #ifdef DOXYGEN
35 
48 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
49 #undef Vc_ICC
50 
57 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
58 #undef Vc_CLANG
59 
66 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
67 #undef Vc_APPLECLANG
68 
75 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
76 
83 #define Vc_MSVC _MSC_FULL_VER
84 #undef Vc_MSVC
85 
86 
87 #else // DOXYGEN
88 
89 // Compiler defines
90 #ifdef __INTEL_COMPILER
91 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
92 #elif defined(__clang__) && defined(__apple_build_version__)
93 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
94 #elif defined(__clang__)
95 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
96 #elif defined(__GNUC__)
97 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
98 #elif defined(_MSC_VER)
99 #define Vc_MSVC _MSC_FULL_VER
100 #else
101 #define Vc_UNSUPPORTED_COMPILER 1
102 #endif
103 
104 #if defined Vc_GCC && Vc_GCC >= 0x60000
105 #define Vc_RESET_DIAGNOSTICS _Pragma("GCC diagnostic pop")
106 #pragma GCC diagnostic push
107 #pragma GCC diagnostic ignored "-Wignored-attributes"
108 #else
109 #define Vc_RESET_DIAGNOSTICS
110 #endif
111 
112 #if defined Vc_ICC
113 // 'warning #2922: template parameter "<unnamed>" cannot be used because it follows a
114 // parameter pack and cannot be deduced from the parameters of function template'
115 // This warning is stupid. The parameter is unnamed because I don't want to use it. I see
116 // no other workaround than to disable the warning. Sadly, it doesn't suffice to disable
117 // it for the Vc headers. It must also be disabled at the places Vc types are used.
118 #pragma warning disable 2922
119 #endif
120 
121 #if __cplusplus < 201103 && (!defined Vc_MSVC || _MSC_VER < 1900)
122 # error "Vc requires support for C++11."
123 #elif __cplusplus >= 201402L
124 # define Vc_CXX14 1
125 # if __cplusplus > 201700L
126 # define Vc_CXX17 1
127 # endif
128 #endif
129 
130 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
131 #define Vc_GNU_ASM 1
132 #endif
133 
134 #ifdef Vc_GCC
135 # if Vc_GCC >= 0x70000 && defined __i386__ && (!defined __GLIBC_PREREQ || !__GLIBC_PREREQ(2,26))
136  // GCC 7 changed alignof(max_align_t) to 16. glibc 2.26 followed with malloc in 2.26.
137  // 1. If GCC >= 7 and libc is not glibc max_align_t and malloc mismatch
138  // 2. If GCC >= 7 and libc is glibc < 2.26 max_align_t and malloc mismatch
139 # elif Vc_GCC >= 0x40900
140 # define Vc_HAVE_STD_MAX_ALIGN_T 1
141 # else
142 # define Vc_HAVE_MAX_ALIGN_T 1
143 # endif
144 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
145 // Clang/ICC don't provide max_align_t at all
146 // TODO: Clang defines max_align_t since 3.5.0. Whether std::max_align_t is defined depends on the
147 // standard library version.
148 # define Vc_HAVE_STD_MAX_ALIGN_T 1
149 #endif
150 
151 #if defined(Vc_GCC) || defined(Vc_CLANG) || defined Vc_APPLECLANG
152 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
153 #endif
154 
155 #ifdef Vc_MSVC
156 # define Vc_CDECL __cdecl
157 # define Vc_VDECL __vectorcall
158 #else
159 # define Vc_CDECL
160 # define Vc_VDECL
161 #endif
162 
163 /* Define the following strings to a unique integer, which is the only type the preprocessor can
164  * compare. This allows to use -DVc_IMPL=SSE3. The preprocessor will then consider Vc_IMPL and SSE3
165  * to be equal. Of course, it is important to undefine the strings later on!
166  */
167 #define Scalar 0x00100000
168 #define SSE 0x00200000
169 #define SSE2 0x00300000
170 #define SSE3 0x00400000
171 #define SSSE3 0x00500000
172 #define SSE4_1 0x00600000
173 #define SSE4_2 0x00700000
174 #define AVX 0x00800000
175 #define AVX2 0x00900000
176 
177 #define XOP 0x00000001
178 #define FMA4 0x00000002
179 #define F16C 0x00000004
180 #define POPCNT 0x00000008
181 #define SSE4a 0x00000010
182 #define FMA 0x00000020
183 #define BMI2 0x00000040
184 
185 #define IMPL_MASK 0xFFF00000
186 #define EXT_MASK 0x000FFFFF
187 
188 #ifdef Vc_MSVC
189 # ifdef _M_IX86_FP
190 # if _M_IX86_FP >= 1
191 # ifndef __SSE__
192 # define __SSE__ 1
193 # endif
194 # endif
195 # if _M_IX86_FP >= 2
196 # ifndef __SSE2__
197 # define __SSE2__ 1
198 # endif
199 # endif
200 # elif defined(_M_AMD64)
201 // If the target is x86_64 then SSE2 is guaranteed
202 # ifndef __SSE__
203 # define __SSE__ 1
204 # endif
205 # ifndef __SSE2__
206 # define __SSE2__ 1
207 # endif
208 # endif
209 #endif
210 
211 #if defined Vc_ICC && !defined __POPCNT__
212 # if defined __SSE4_2__ || defined __SSE4A__
213 # define __POPCNT__ 1
214 # endif
215 #endif
216 
217 #ifdef VC_IMPL
218 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
219 #endif
220 
221 #ifndef Vc_IMPL
222 
223 # if defined(__AVX2__)
224 # define Vc_IMPL_AVX2 1
225 # define Vc_IMPL_AVX 1
226 # elif defined(__AVX__)
227 # define Vc_IMPL_AVX 1
228 # else
229 # if defined(__SSE4_2__)
230 # define Vc_IMPL_SSE 1
231 # define Vc_IMPL_SSE4_2 1
232 # endif
233 # if defined(__SSE4_1__)
234 # define Vc_IMPL_SSE 1
235 # define Vc_IMPL_SSE4_1 1
236 # endif
237 # if defined(__SSE3__)
238 # define Vc_IMPL_SSE 1
239 # define Vc_IMPL_SSE3 1
240 # endif
241 # if defined(__SSSE3__)
242 # define Vc_IMPL_SSE 1
243 # define Vc_IMPL_SSSE3 1
244 # endif
245 # if defined(__SSE2__)
246 # define Vc_IMPL_SSE 1
247 # define Vc_IMPL_SSE2 1
248 # endif
249 
250 # if defined(Vc_IMPL_SSE)
251  // nothing
252 # else
253 # define Vc_IMPL_Scalar 1
254 # endif
255 # endif
256 # if !defined(Vc_IMPL_Scalar)
257 # ifdef __FMA4__
258 # define Vc_IMPL_FMA4 1
259 # endif
260 # ifdef __XOP__
261 # define Vc_IMPL_XOP 1
262 # endif
263 # ifdef __F16C__
264 # define Vc_IMPL_F16C 1
265 # endif
266 # ifdef __POPCNT__
267 # define Vc_IMPL_POPCNT 1
268 # endif
269 # ifdef __SSE4A__
270 # define Vc_IMPL_SSE4a 1
271 # endif
272 # ifdef __FMA__
273 # define Vc_IMPL_FMA 1
274 # endif
275 # ifdef __BMI2__
276 # define Vc_IMPL_BMI2 1
277 # endif
278 # endif
279 
280 #else // Vc_IMPL
281 
282 # if (Vc_IMPL & IMPL_MASK) == AVX2 // AVX2 supersedes SSE
283 # define Vc_IMPL_AVX2 1
284 # define Vc_IMPL_AVX 1
285 # elif (Vc_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
286 # define Vc_IMPL_AVX 1
287 # elif (Vc_IMPL & IMPL_MASK) == Scalar
288 # define Vc_IMPL_Scalar 1
289 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
290 # define Vc_IMPL_SSE4_2 1
291 # define Vc_IMPL_SSE4_1 1
292 # define Vc_IMPL_SSSE3 1
293 # define Vc_IMPL_SSE3 1
294 # define Vc_IMPL_SSE2 1
295 # define Vc_IMPL_SSE 1
296 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
297 # define Vc_IMPL_SSE4_1 1
298 # define Vc_IMPL_SSSE3 1
299 # define Vc_IMPL_SSE3 1
300 # define Vc_IMPL_SSE2 1
301 # define Vc_IMPL_SSE 1
302 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
303 # define Vc_IMPL_SSSE3 1
304 # define Vc_IMPL_SSE3 1
305 # define Vc_IMPL_SSE2 1
306 # define Vc_IMPL_SSE 1
307 # elif (Vc_IMPL & IMPL_MASK) == SSE3
308 # define Vc_IMPL_SSE3 1
309 # define Vc_IMPL_SSE2 1
310 # define Vc_IMPL_SSE 1
311 # elif (Vc_IMPL & IMPL_MASK) == SSE2
312 # define Vc_IMPL_SSE2 1
313 # define Vc_IMPL_SSE 1
314 # elif (Vc_IMPL & IMPL_MASK) == SSE
315 # define Vc_IMPL_SSE 1
316 # if defined(__SSE4_2__)
317 # define Vc_IMPL_SSE4_2 1
318 # endif
319 # if defined(__SSE4_1__)
320 # define Vc_IMPL_SSE4_1 1
321 # endif
322 # if defined(__SSE3__)
323 # define Vc_IMPL_SSE3 1
324 # endif
325 # if defined(__SSSE3__)
326 # define Vc_IMPL_SSSE3 1
327 # endif
328 # if defined(__SSE2__)
329 # define Vc_IMPL_SSE2 1
330 # endif
331 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
332  // this is for backward compatibility only where SSE4a was included in the main
333  // line of available SIMD instruction sets
334 # define Vc_IMPL_SSE3 1
335 # define Vc_IMPL_SSE2 1
336 # define Vc_IMPL_SSE 1
337 # endif
338 # if (Vc_IMPL & XOP)
339 # define Vc_IMPL_XOP 1
340 # endif
341 # if (Vc_IMPL & FMA4)
342 # define Vc_IMPL_FMA4 1
343 # endif
344 # if (Vc_IMPL & F16C)
345 # define Vc_IMPL_F16C 1
346 # endif
347 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
348 # define Vc_IMPL_POPCNT 1
349 # endif
350 # if (Vc_IMPL & SSE4a)
351 # define Vc_IMPL_SSE4a 1
352 # endif
353 # if (Vc_IMPL & FMA)
354 # define Vc_IMPL_FMA 1
355 # endif
356 # if (Vc_IMPL & BMI2)
357 # define Vc_IMPL_BMI2 1
358 # endif
359 # undef Vc_IMPL
360 
361 #endif // Vc_IMPL
362 
363 // If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
364 #ifdef __AVX__
365 # define Vc_USE_VEX_CODING 1
366 #endif
367 
368 #ifdef Vc_IMPL_AVX
369 // if we have AVX then we also have all SSE intrinsics
370 # define Vc_IMPL_SSE4_2 1
371 # define Vc_IMPL_SSE4_1 1
372 # define Vc_IMPL_SSSE3 1
373 # define Vc_IMPL_SSE3 1
374 # define Vc_IMPL_SSE2 1
375 # define Vc_IMPL_SSE 1
376 #endif
377 
378 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
379 # if defined(Vc_IMPL_AVX)
380 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
381 # undef Vc_IMPL_AVX
382 # if defined(Vc_IMPL_AVX2)
383 # undef Vc_IMPL_AVX2
384 # endif
385 # endif
386 #endif
387 
388 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX)
389 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
390 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
391 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
392 # endif
393 
394 #undef Scalar
395 #undef SSE
396 #undef SSE2
397 #undef SSE3
398 #undef SSSE3
399 #undef SSE4_1
400 #undef SSE4_2
401 #undef AVX
402 #undef AVX2
403 
404 #undef XOP
405 #undef FMA4
406 #undef F16C
407 #undef POPCNT
408 #undef SSE4a
409 #undef FMA
410 #undef BMI2
411 
412 #undef IMPL_MASK
413 #undef EXT_MASK
414 
415 #if defined Vc_IMPL_AVX2
416 #define Vc_DEFAULT_IMPL_AVX2
417 #elif defined Vc_IMPL_AVX
418 #define Vc_DEFAULT_IMPL_AVX
419 #elif defined Vc_IMPL_SSE
420 #define Vc_DEFAULT_IMPL_SSE
421 #elif defined Vc_IMPL_Scalar
422 #define Vc_DEFAULT_IMPL_Scalar
423 #else
424 #error "Preprocessor logic broken. Please report a bug."
425 #endif
426 
427 #endif // DOXYGEN
428 
429 namespace Vc_VERSIONED_NAMESPACE
430 {
431 
432 typedef signed char int8_t;
433 typedef unsigned char uint8_t;
434 typedef signed short int16_t;
435 typedef unsigned short uint16_t;
436 typedef signed int int32_t;
437 typedef unsigned int uint32_t;
438 typedef signed long long int64_t;
439 typedef unsigned long long uint64_t;
440 
466 };
467 
477 enum Implementation : std::uint_least32_t { // TODO: make enum class
496  ImplementationMask = 0xfff
497 };
498 
509 enum ExtraInstructions : std::uint_least32_t { // TODO: make enum class
513  Fma4Instructions = 0x02000,
515  XopInstructions = 0x04000,
519  Sse4aInstructions = 0x10000,
521  FmaInstructions = 0x20000,
523  VexInstructions = 0x40000,
525  Bmi2Instructions = 0x80000,
526  // PclmulqdqInstructions,
527  // AesInstructions,
528  // RdrandInstructions
529  ExtraInstructionsMask = 0xfffff000u
530 };
531 
541 template <unsigned int Features> struct ImplementationT {
543  static constexpr Implementation current()
544  {
545  return static_cast<Implementation>(Features & ImplementationMask);
546  }
548  static constexpr bool is(Implementation impl)
549  {
550  return static_cast<unsigned int>(impl) == current();
551  }
556  static constexpr bool is_between(Implementation low, Implementation high)
557  {
558  return static_cast<unsigned int>(low) <= current() &&
559  static_cast<unsigned int>(high) >= current();
560  }
564  static constexpr bool runs_on(unsigned int extraInstructions)
565  {
566  return (extraInstructions & Features & ExtraInstructionsMask) ==
567  (Features & ExtraInstructionsMask);
568  }
569 };
576 using CurrentImplementation = ImplementationT<
577 #ifdef Vc_IMPL_Scalar
578  ScalarImpl
579 #elif defined(Vc_IMPL_AVX2)
580  AVX2Impl
581 #elif defined(Vc_IMPL_AVX)
582  AVXImpl
583 #elif defined(Vc_IMPL_SSE4_2)
584  SSE42Impl
585 #elif defined(Vc_IMPL_SSE4_1)
586  SSE41Impl
587 #elif defined(Vc_IMPL_SSSE3)
588  SSSE3Impl
589 #elif defined(Vc_IMPL_SSE3)
590  SSE3Impl
591 #elif defined(Vc_IMPL_SSE2)
592  SSE2Impl
593 #endif
594 #ifdef Vc_IMPL_SSE4a
596 #ifdef Vc_IMPL_XOP
598 #ifdef Vc_IMPL_FMA4
600 #endif
601 #endif
602 #endif
603 #ifdef Vc_IMPL_POPCNT
605 #endif
606 #ifdef Vc_IMPL_FMA
608 #endif
609 #ifdef Vc_IMPL_BMI2
611 #endif
612 #ifdef Vc_USE_VEX_CODING
614 #endif
615  >;
616 
617 } // namespace Vc
618 
619 #include "version.h"
620 
621 #endif // VC_GLOBAL_H_
622 
623 // vim: foldmethod=marker
Vc::FmaInstructions
@ FmaInstructions
Support for FMA instructions (3 operand variant)
Definition: global.h:521
Vc::AlignOnCacheline
@ AlignOnCacheline
Align on boundary of cache line sizes (e.g.
Definition: global.h:459
Vc::Bmi2Instructions
@ Bmi2Instructions
Support for BMI2 instructions.
Definition: global.h:525
Vc::Sse4aInstructions
@ Sse4aInstructions
Support for SSE4a instructions.
Definition: global.h:519
Vc::SSE42Impl
@ SSE42Impl
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:489
Vc::ImplementationT
Definition: global.h:541
Vc::Float16cInstructions
@ Float16cInstructions
Support for float16 conversions in hardware.
Definition: global.h:511
Vc::ImplementationT::current
static constexpr Implementation current()
Returns the currently used Vc::Implementation.
Definition: global.h:543
Vc::PopcntInstructions
@ PopcntInstructions
Support for the population count instruction.
Definition: global.h:517
Vc::AVX2Impl
@ AVX2Impl
x86 AVX + AVX2
Definition: global.h:493
Vc::SSSE3Impl
@ SSSE3Impl
x86 SSE + SSE2 + SSE3 + SSSE3
Definition: global.h:485
Vc::SSE3Impl
@ SSE3Impl
x86 SSE + SSE2 + SSE3
Definition: global.h:483
Vc::AVXImpl
@ AVXImpl
x86 AVX
Definition: global.h:491
Vc::MICImpl
@ MICImpl
Intel Xeon Phi.
Definition: global.h:495
Vc::AlignOnPage
@ AlignOnPage
Align on boundary of page sizes (e.g.
Definition: global.h:465
Vc::ImplementationT::is_between
static constexpr bool is_between(Implementation low, Implementation high)
Returns whether the current Vc::Implementation implements at least low and at most high.
Definition: global.h:556
Vc::MallocAlignment
MallocAlignment
Definition: global.h:447
Vc::Implementation
Implementation
Definition: global.h:477
Vc::AlignOnVector
@ AlignOnVector
Align on boundary of vector sizes (e.g.
Definition: global.h:453
Vc::SSE41Impl
@ SSE41Impl
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
Definition: global.h:487
Vc::CurrentImplementation
ImplementationT< > CurrentImplementation
Definition: global.h:615
Vc::ExtraInstructions
ExtraInstructions
Definition: global.h:509
Vc::SSE2Impl
@ SSE2Impl
x86 SSE + SSE2
Definition: global.h:481
Vc::ImplementationT::is
static constexpr bool is(Implementation impl)
Returns whether impl is the current Vc::Implementation.
Definition: global.h:548
Vc::XopInstructions
@ XopInstructions
Support for XOP instructions.
Definition: global.h:515
Vc::Fma4Instructions
@ Fma4Instructions
Support for FMA4 instructions.
Definition: global.h:513
Vc::VexInstructions
@ VexInstructions
Support for ternary instruction coding (VEX)
Definition: global.h:523
Vc::ScalarImpl
@ ScalarImpl
uses only fundamental types
Definition: global.h:479
Vc::ImplementationT::runs_on
static constexpr bool runs_on(unsigned int extraInstructions)
Returns whether the current code would run on a CPU providing extraInstructions.
Definition: global.h:564