Vc  1.4.1-dev
SIMD Vector Classes for C++
global.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_GLOBAL_H_
29 #define VC_GLOBAL_H_
30 
31 #include <cstdint>
32 #include "fwddecl.h"
33 
34 #ifdef DOXYGEN
35 
48 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
49 #undef Vc_ICC
50 
57 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
58 #undef Vc_CLANG
59 
66 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
67 #undef Vc_APPLECLANG
68 
75 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
76 
83 #define Vc_MSVC _MSC_FULL_VER
84 #undef Vc_MSVC
85 
86 
87 #else // DOXYGEN
88 
89 // Compiler defines
90 #ifdef __INTEL_COMPILER
91 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
92 #elif defined(__clang__) && defined(__apple_build_version__)
93 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
94 #elif defined(__clang__)
95 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
96 #elif defined(__GNUC__)
97 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
98 #elif defined(_MSC_VER)
99 #define Vc_MSVC _MSC_FULL_VER
100 #else
101 #define Vc_UNSUPPORTED_COMPILER 1
102 #endif
103 
104 #if defined Vc_GCC && Vc_GCC >= 0x60000
105 #define Vc_RESET_DIAGNOSTICS _Pragma("GCC diagnostic pop")
106 #pragma GCC diagnostic push
107 #pragma GCC diagnostic ignored "-Wignored-attributes"
108 #else
109 #define Vc_RESET_DIAGNOSTICS
110 #endif
111 
112 #if defined Vc_ICC
113 // 'warning #2922: template parameter "<unnamed>" cannot be used because it follows a
114 // parameter pack and cannot be deduced from the parameters of function template'
115 // This warning is stupid. The parameter is unnamed because I don't want to use it. I see
116 // no other workaround than to disable the warning. Sadly, it doesn't suffice to disable
117 // it for the Vc headers. It must also be disabled at the places Vc types are used.
118 #pragma warning disable 2922
119 #endif
120 
121 #if __cplusplus < 201103 && (!defined Vc_MSVC || _MSC_VER < 1900)
122 # error "Vc requires support for C++11."
123 #elif __cplusplus >= 201402L
124 # define Vc_CXX14 1
125 # if __cplusplus > 201700L
126 # define Vc_CXX17 1
127 # endif
128 #endif
129 
130 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
131 #define Vc_GNU_ASM 1
132 #endif
133 
134 #ifdef Vc_GCC
135 # define Vc_HAVE_MAX_ALIGN_T 1
136 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
137 // Clang/ICC don't provide max_align_t at all
138 # define Vc_HAVE_STD_MAX_ALIGN_T 1
139 #endif
140 
141 #if defined(Vc_GCC) || defined(Vc_CLANG) || defined Vc_APPLECLANG
142 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
143 #endif
144 
145 #ifdef Vc_MSVC
146 # define Vc_CDECL __cdecl
147 # define Vc_VDECL __vectorcall
148 #else
149 # define Vc_CDECL
150 # define Vc_VDECL
151 #endif
152 
153 /* Define the following strings to a unique integer, which is the only type the preprocessor can
154  * compare. This allows to use -DVc_IMPL=SSE3. The preprocessor will then consider Vc_IMPL and SSE3
155  * to be equal. Of course, it is important to undefine the strings later on!
156  */
157 #define Scalar 0x00100000
158 #define SSE 0x00200000
159 #define SSE2 0x00300000
160 #define SSE3 0x00400000
161 #define SSSE3 0x00500000
162 #define SSE4_1 0x00600000
163 #define SSE4_2 0x00700000
164 #define AVX 0x00800000
165 #define AVX2 0x00900000
166 
167 #define XOP 0x00000001
168 #define FMA4 0x00000002
169 #define F16C 0x00000004
170 #define POPCNT 0x00000008
171 #define SSE4a 0x00000010
172 #define FMA 0x00000020
173 #define BMI2 0x00000040
174 
175 #define IMPL_MASK 0xFFF00000
176 #define EXT_MASK 0x000FFFFF
177 
178 #ifdef Vc_MSVC
179 # ifdef _M_IX86_FP
180 # if _M_IX86_FP >= 1
181 # ifndef __SSE__
182 # define __SSE__ 1
183 # endif
184 # endif
185 # if _M_IX86_FP >= 2
186 # ifndef __SSE2__
187 # define __SSE2__ 1
188 # endif
189 # endif
190 # elif defined(_M_AMD64)
191 // If the target is x86_64 then SSE2 is guaranteed
192 # ifndef __SSE__
193 # define __SSE__ 1
194 # endif
195 # ifndef __SSE2__
196 # define __SSE2__ 1
197 # endif
198 # endif
199 #endif
200 
201 #if defined Vc_ICC && !defined __POPCNT__
202 # if defined __SSE4_2__ || defined __SSE4A__
203 # define __POPCNT__ 1
204 # endif
205 #endif
206 
207 #ifdef VC_IMPL
208 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
209 #endif
210 
211 #ifndef Vc_IMPL
212 
213 # if defined(__AVX2__)
214 # define Vc_IMPL_AVX2 1
215 # define Vc_IMPL_AVX 1
216 # elif defined(__AVX__)
217 # define Vc_IMPL_AVX 1
218 # else
219 # if defined(__SSE4_2__)
220 # define Vc_IMPL_SSE 1
221 # define Vc_IMPL_SSE4_2 1
222 # endif
223 # if defined(__SSE4_1__)
224 # define Vc_IMPL_SSE 1
225 # define Vc_IMPL_SSE4_1 1
226 # endif
227 # if defined(__SSE3__)
228 # define Vc_IMPL_SSE 1
229 # define Vc_IMPL_SSE3 1
230 # endif
231 # if defined(__SSSE3__)
232 # define Vc_IMPL_SSE 1
233 # define Vc_IMPL_SSSE3 1
234 # endif
235 # if defined(__SSE2__)
236 # define Vc_IMPL_SSE 1
237 # define Vc_IMPL_SSE2 1
238 # endif
239 
240 # if defined(Vc_IMPL_SSE)
241  // nothing
242 # else
243 # define Vc_IMPL_Scalar 1
244 # endif
245 # endif
246 # if !defined(Vc_IMPL_Scalar)
247 # ifdef __FMA4__
248 # define Vc_IMPL_FMA4 1
249 # endif
250 # ifdef __XOP__
251 # define Vc_IMPL_XOP 1
252 # endif
253 # ifdef __F16C__
254 # define Vc_IMPL_F16C 1
255 # endif
256 # ifdef __POPCNT__
257 # define Vc_IMPL_POPCNT 1
258 # endif
259 # ifdef __SSE4A__
260 # define Vc_IMPL_SSE4a 1
261 # endif
262 # ifdef __FMA__
263 # define Vc_IMPL_FMA 1
264 # endif
265 # ifdef __BMI2__
266 # define Vc_IMPL_BMI2 1
267 # endif
268 # endif
269 
270 #else // Vc_IMPL
271 
272 # if (Vc_IMPL & IMPL_MASK) == AVX2 // AVX2 supersedes SSE
273 # define Vc_IMPL_AVX2 1
274 # define Vc_IMPL_AVX 1
275 # elif (Vc_IMPL & IMPL_MASK) == AVX // AVX supersedes SSE
276 # define Vc_IMPL_AVX 1
277 # elif (Vc_IMPL & IMPL_MASK) == Scalar
278 # define Vc_IMPL_Scalar 1
279 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
280 # define Vc_IMPL_SSE4_2 1
281 # define Vc_IMPL_SSE4_1 1
282 # define Vc_IMPL_SSSE3 1
283 # define Vc_IMPL_SSE3 1
284 # define Vc_IMPL_SSE2 1
285 # define Vc_IMPL_SSE 1
286 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
287 # define Vc_IMPL_SSE4_1 1
288 # define Vc_IMPL_SSSE3 1
289 # define Vc_IMPL_SSE3 1
290 # define Vc_IMPL_SSE2 1
291 # define Vc_IMPL_SSE 1
292 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
293 # define Vc_IMPL_SSSE3 1
294 # define Vc_IMPL_SSE3 1
295 # define Vc_IMPL_SSE2 1
296 # define Vc_IMPL_SSE 1
297 # elif (Vc_IMPL & IMPL_MASK) == SSE3
298 # define Vc_IMPL_SSE3 1
299 # define Vc_IMPL_SSE2 1
300 # define Vc_IMPL_SSE 1
301 # elif (Vc_IMPL & IMPL_MASK) == SSE2
302 # define Vc_IMPL_SSE2 1
303 # define Vc_IMPL_SSE 1
304 # elif (Vc_IMPL & IMPL_MASK) == SSE
305 # define Vc_IMPL_SSE 1
306 # if defined(__SSE4_2__)
307 # define Vc_IMPL_SSE4_2 1
308 # endif
309 # if defined(__SSE4_1__)
310 # define Vc_IMPL_SSE4_1 1
311 # endif
312 # if defined(__SSE3__)
313 # define Vc_IMPL_SSE3 1
314 # endif
315 # if defined(__SSSE3__)
316 # define Vc_IMPL_SSSE3 1
317 # endif
318 # if defined(__SSE2__)
319 # define Vc_IMPL_SSE2 1
320 # endif
321 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
322  // this is for backward compatibility only where SSE4a was included in the main
323  // line of available SIMD instruction sets
324 # define Vc_IMPL_SSE3 1
325 # define Vc_IMPL_SSE2 1
326 # define Vc_IMPL_SSE 1
327 # endif
328 # if (Vc_IMPL & XOP)
329 # define Vc_IMPL_XOP 1
330 # endif
331 # if (Vc_IMPL & FMA4)
332 # define Vc_IMPL_FMA4 1
333 # endif
334 # if (Vc_IMPL & F16C)
335 # define Vc_IMPL_F16C 1
336 # endif
337 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
338 # define Vc_IMPL_POPCNT 1
339 # endif
340 # if (Vc_IMPL & SSE4a)
341 # define Vc_IMPL_SSE4a 1
342 # endif
343 # if (Vc_IMPL & FMA)
344 # define Vc_IMPL_FMA 1
345 # endif
346 # if (Vc_IMPL & BMI2)
347 # define Vc_IMPL_BMI2 1
348 # endif
349 # undef Vc_IMPL
350 
351 #endif // Vc_IMPL
352 
353 // If AVX is enabled in the compiler it will use VEX coding for the SIMD instructions.
354 #ifdef __AVX__
355 # define Vc_USE_VEX_CODING 1
356 #endif
357 
358 #ifdef Vc_IMPL_AVX
359 // if we have AVX then we also have all SSE intrinsics
360 # define Vc_IMPL_SSE4_2 1
361 # define Vc_IMPL_SSE4_1 1
362 # define Vc_IMPL_SSSE3 1
363 # define Vc_IMPL_SSE3 1
364 # define Vc_IMPL_SSE2 1
365 # define Vc_IMPL_SSE 1
366 #endif
367 
368 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
369 # if defined(Vc_IMPL_AVX)
370 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
371 # undef Vc_IMPL_AVX
372 # if defined(Vc_IMPL_AVX2)
373 # undef Vc_IMPL_AVX2
374 # endif
375 # endif
376 #endif
377 
378 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX)
379 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
380 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
381 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
382 # endif
383 
384 #undef Scalar
385 #undef SSE
386 #undef SSE2
387 #undef SSE3
388 #undef SSSE3
389 #undef SSE4_1
390 #undef SSE4_2
391 #undef AVX
392 #undef AVX2
393 
394 #undef XOP
395 #undef FMA4
396 #undef F16C
397 #undef POPCNT
398 #undef SSE4a
399 #undef FMA
400 #undef BMI2
401 
402 #undef IMPL_MASK
403 #undef EXT_MASK
404 
405 #if defined Vc_IMPL_AVX2
406 #define Vc_DEFAULT_IMPL_AVX2
407 #elif defined Vc_IMPL_AVX
408 #define Vc_DEFAULT_IMPL_AVX
409 #elif defined Vc_IMPL_SSE
410 #define Vc_DEFAULT_IMPL_SSE
411 #elif defined Vc_IMPL_Scalar
412 #define Vc_DEFAULT_IMPL_Scalar
413 #else
414 #error "Preprocessor logic broken. Please report a bug."
415 #endif
416 
417 #endif // DOXYGEN
418 
419 namespace Vc_VERSIONED_NAMESPACE
420 {
421 
422 typedef signed char int8_t;
423 typedef unsigned char uint8_t;
424 typedef signed short int16_t;
425 typedef unsigned short uint16_t;
426 typedef signed int int32_t;
427 typedef unsigned int uint32_t;
428 typedef signed long long int64_t;
429 typedef unsigned long long uint64_t;
430 
456 };
457 
467 enum Implementation : std::uint_least32_t { // TODO: make enum class
486  ImplementationMask = 0xfff
487 };
488 
499 enum ExtraInstructions : std::uint_least32_t { // TODO: make enum class
503  Fma4Instructions = 0x02000,
505  XopInstructions = 0x04000,
509  Sse4aInstructions = 0x10000,
511  FmaInstructions = 0x20000,
513  VexInstructions = 0x40000,
515  Bmi2Instructions = 0x80000,
516  // PclmulqdqInstructions,
517  // AesInstructions,
518  // RdrandInstructions
519  ExtraInstructionsMask = 0xfffff000u
520 };
521 
531 template <unsigned int Features> struct ImplementationT {
533  static constexpr Implementation current()
534  {
535  return static_cast<Implementation>(Features & ImplementationMask);
536  }
538  static constexpr bool is(Implementation impl)
539  {
540  return static_cast<unsigned int>(impl) == current();
541  }
546  static constexpr bool is_between(Implementation low, Implementation high)
547  {
548  return static_cast<unsigned int>(low) <= current() &&
549  static_cast<unsigned int>(high) >= current();
550  }
554  static constexpr bool runs_on(unsigned int extraInstructions)
555  {
556  return (extraInstructions & Features & ExtraInstructionsMask) ==
557  (Features & ExtraInstructionsMask);
558  }
559 };
567 #ifdef Vc_IMPL_Scalar
568  ScalarImpl
569 #elif defined(Vc_IMPL_AVX2)
570  AVX2Impl
571 #elif defined(Vc_IMPL_AVX)
572  AVXImpl
573 #elif defined(Vc_IMPL_SSE4_2)
574  SSE42Impl
575 #elif defined(Vc_IMPL_SSE4_1)
576  SSE41Impl
577 #elif defined(Vc_IMPL_SSSE3)
578  SSSE3Impl
579 #elif defined(Vc_IMPL_SSE3)
580  SSE3Impl
581 #elif defined(Vc_IMPL_SSE2)
582  SSE2Impl
583 #endif
584 #ifdef Vc_IMPL_SSE4a
586 #ifdef Vc_IMPL_XOP
588 #ifdef Vc_IMPL_FMA4
590 #endif
591 #endif
592 #endif
593 #ifdef Vc_IMPL_POPCNT
595 #endif
596 #ifdef Vc_IMPL_FMA
598 #endif
599 #ifdef Vc_IMPL_BMI2
601 #endif
602 #ifdef Vc_USE_VEX_CODING
604 #endif
605  >;
606 
607 } // namespace Vc
608 
609 #include "version.h"
610 
611 #endif // VC_GLOBAL_H_
612 
613 // vim: foldmethod=marker
Intel Xeon Phi.
Definition: global.h:485
ExtraInstructions
The list of available instructions is not easily described by a linear list of instruction sets...
Definition: global.h:499
Align on boundary of page sizes (e.g.
Definition: global.h:455
static constexpr bool is_between(Implementation low, Implementation high)
Returns whether the current Vc::Implementation implements at least low and at most high...
Definition: global.h:546
Support for FMA instructions (3 operand variant)
Definition: global.h:511
Implementation
Enum to identify a certain SIMD instruction set.
Definition: global.h:467
static constexpr bool runs_on(unsigned int extraInstructions)
Returns whether the current code would run on a CPU providing extraInstructions.
Definition: global.h:554
This class identifies the specific implementation Vc uses in the current translation unit in terms of...
Definition: global.h:531
Support for BMI2 instructions.
Definition: global.h:515
Support for XOP instructions.
Definition: global.h:505
MallocAlignment
Enum that specifies the alignment and padding restrictions to use for memory allocation with Vc::mall...
Definition: global.h:437
Support for the population count instruction.
Definition: global.h:507
Support for SSE4a instructions.
Definition: global.h:509
static constexpr bool is(Implementation impl)
Returns whether impl is the current Vc::Implementation.
Definition: global.h:538
x86 SSE + SSE2
Definition: global.h:471
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1 + SSE4.2
Definition: global.h:479
Align on boundary of cache line sizes (e.g.
Definition: global.h:449
Support for ternary instruction coding (VEX)
Definition: global.h:513
x86 SSE + SSE2 + SSE3 + SSSE3
Definition: global.h:475
x86 SSE + SSE2 + SSE3
Definition: global.h:473
Support for FMA4 instructions.
Definition: global.h:503
static constexpr Implementation current()
Returns the currently used Vc::Implementation.
Definition: global.h:533
uses only fundamental types
Definition: global.h:469
Align on boundary of vector sizes (e.g.
Definition: global.h:443
x86 AVX + AVX2
Definition: global.h:483
Support for float16 conversions in hardware.
Definition: global.h:501
x86 AVX
Definition: global.h:481
x86 SSE + SSE2 + SSE3 + SSSE3 + SSE4.1
Definition: global.h:477