Additional classes, macros, and functions that help to work more easily with the main vector types.
|
using | CurrentImplementation = ImplementationT< > |
| Identifies the Vc implementation used in the current translation unit. More...
|
|
using | VectorAlignedBase = AlignedBase< Detail::max(alignof(Vector< float >), alignof(Vector< double >), alignof(Vector< ullong >), alignof(Vector< llong >), alignof(Vector< ulong >), alignof(Vector< long >), alignof(Vector< uint >), alignof(Vector< int >), alignof(Vector< ushort >), alignof(Vector< short >), alignof(Vector< uchar >), alignof(Vector< schar >))> |
| Helper type to ensure suitable alignment for any Vc::Vector<T> type (using the default VectorAbi). More...
|
|
template<typename V > |
using | VectorAlignedBaseT = AlignedBase< alignof(V)> |
| Variant of the above type ensuring suitable alignment only for the specified vector type V . More...
|
|
using | MemoryAlignedBase = AlignedBase< Detail::max(Vector< float >::MemoryAlignment, Vector< double >::MemoryAlignment, Vector< ullong >::MemoryAlignment, Vector< llong >::MemoryAlignment, Vector< ulong >::MemoryAlignment, Vector< long >::MemoryAlignment, Vector< uint >::MemoryAlignment, Vector< int >::MemoryAlignment, Vector< ushort >::MemoryAlignment, Vector< short >::MemoryAlignment, Vector< uchar >::MemoryAlignment, Vector< schar >::MemoryAlignment)> |
| Helper class to ensure suitable alignment for arrays of scalar objects for any Vc::Vector<T> type (using the default VectorAbi). More...
|
|
template<typename V > |
using | MemoryAlignedBaseT = AlignedBase< V::MemoryAlignment > |
| Variant of the above type ensuring suitable alignment only for the specified vector type V . More...
|
|
using | llong = long long |
| long long shorthand
|
|
using | ullong = unsigned long long |
| unsigned long long shorthand
|
|
using | ulong = unsigned long |
| unsigned long shorthand
|
|
using | uint = unsigned int |
| unsigned int shorthand
|
|
using | ushort = unsigned short |
| unsigned short shorthand
|
|
using | uchar = unsigned char |
| unsigned char shorthand
|
|
using | schar = signed char |
| signed char shorthand
|
|
|
enum | MallocAlignment { AlignOnVector
, AlignOnCacheline
, AlignOnPage
} |
| Enum that specifies the alignment and padding restrictions to use for memory allocation with Vc::malloc. More...
|
|
enum | Implementation : std::uint_least32_t {
ScalarImpl
, SSE2Impl
, SSE3Impl
, SSSE3Impl
,
SSE41Impl
, SSE42Impl
, AVXImpl
, AVX2Impl
,
MICImpl
, ImplementationMask = 0xfff
} |
| Enum to identify a certain SIMD instruction set. More...
|
|
enum | ExtraInstructions : std::uint_least32_t {
Float16cInstructions = 0x01000
, Fma4Instructions = 0x02000
, XopInstructions = 0x04000
, PopcntInstructions = 0x08000
,
Sse4aInstructions = 0x10000
, FmaInstructions = 0x20000
, VexInstructions = 0x40000
, Bmi2Instructions = 0x80000
,
ExtraInstructionsMask = 0xfffff000u
} |
| The list of available instructions is not easily described by a linear list of instruction sets. More...
|
|
|
const char * | versionString () |
|
constexpr unsigned int | versionNumber () |
|
template<typename V , typename Parent , typename Dimension , typename RM > |
std::ostream & | operator<< (std::ostream &s, const Vc::MemoryBase< V, Parent, Dimension, RM > &m) |
| Prints the contents of a Memory object into a stream object. More...
|
|
template<class InputIt , class UnaryFunction > |
UnaryFunction | simd_for_each (InputIt first, InputIt last, UnaryFunction f) |
| Vc variant of the std::for_each algorithm. More...
|
|
template<typename Mask , typename T > |
enable_if< is_simd_mask< Mask >::value &&is_simd_vector< T >::value, T > | iif (const Mask &condition, const T &trueValue, const T &falseValue) |
| Function to mimic the ternary operator '?:' (inline-if). More...
|
|
template<typename T > |
constexpr T | iif (bool condition, const T &trueValue, const T &falseValue) |
| Overload of the above for boolean conditions. More...
|
|
template<typename V , typename = enable_if<Traits::is_simd_vector<V>::value>> |
std::pair< V, V > | interleave (const V &a, const V &b) |
| Interleaves the entries from a and b into two vectors of the same type. More...
|
|
template<typename T , Vc::MallocAlignment A> |
T * | malloc (size_t n) |
| Allocates memory on the Heap with alignment and padding suitable for vectorized access. More...
|
|
template<typename T > |
void | free (T *p) |
| Frees memory that was allocated with Vc::malloc. More...
|
|
void | prefetchForOneRead (const void *addr) |
| Prefetch the cacheline containing addr for a single read access. More...
|
|
void | prefetchForModify (const void *addr) |
| Prefetch the cacheline containing addr for modification. More...
|
|
void | prefetchClose (const void *addr) |
| Prefetch the cacheline containing addr to L1 cache. More...
|
|
void | prefetchMid (const void *addr) |
| Prefetch the cacheline containing addr to L2 cache. More...
|
|
void | prefetchFar (const void *addr) |
| Prefetch the cacheline containing addr to L3 cache. More...
|
|
template<typename V , typename T , typename Abi > |
enable_if<(V::size()==Vector< T, Abi >::size() &&sizeof(typename V::VectorEntryType)==sizeof(typename Vector< T, Abi >::VectorEntryType) &&sizeof(V)==sizeof(Vector< T, Abi >) &&alignof(V)<=alignof(Vector< T, Abi >)), V > | reinterpret_components_cast (const Vector< T, Abi > &x) |
| Constructs a new Vector object of type V from the Vector x , reinterpreting the bits of x for the new type V . More...
|
|
template<typename M > |
constexpr WhereImpl::WhereMask< M > | where (const M &mask) |
| Conditional assignment. More...
|
|
|
constexpr AlignedTag | Aligned |
| Use this object for a flags parameter to request aligned loads and stores. More...
|
|
constexpr UnalignedTag | Unaligned |
| Use this object for a flags parameter to request unaligned loads and stores. More...
|
|
constexpr StreamingTag | Streaming |
| Use this object for a flags parameter to request streaming loads and stores. More...
|
|
constexpr LoadStoreFlags::LoadStoreFlags< PrefetchFlag<> > | PrefetchDefault |
| Use this object for a flags parameter to request default software prefetches to be emitted.
|
|
constexpr VectorSpecialInitializerZero | Zero = {} |
| The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/false .
|
|
constexpr VectorSpecialInitializerOne | One = {} |
| The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true .
|
|
constexpr VectorSpecialInitializerIndexesFromZero | IndexesFromZero = {} |
| The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values 0, 1, 2, 3, 4, ...
|
|
|
#define | Vc_ICC __INTEL_COMPILER_BUILD_DATE |
| This macro is defined to a number identifying the ICC version if the current translation unit is compiled with the Intel compiler. More...
|
|
#define | Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__) |
| This macro is defined to a number identifying the Clang version if the current translation unit is compiled with the Clang compiler. More...
|
|
#define | Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__) |
| This macro is defined to a number identifying the Apple Clang version if the current translation unit is compiled with the Apple Clang compiler. More...
|
|
#define | Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__) |
| This macro is defined to a number identifying the GCC version if the current translation unit is compiled with the GCC compiler. More...
|
|
#define | Vc_MSVC _MSC_FULL_VER |
| This macro is defined to a number identifying the Microsoft Visual C++ version if the current translation unit is compiled with the Visual C++ (MSVC) compiler. More...
|
|
The list of available instructions is not easily described by a linear list of instruction sets.
On x86 the following instruction sets always include their predecessors: SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2
But there are additional instructions that are not necessarily required by this list. These are covered in this enum.
Enumerator |
---|
Float16cInstructions | Support for float16 conversions in hardware.
|
Fma4Instructions | Support for FMA4 instructions.
|
XopInstructions | Support for XOP instructions.
|
PopcntInstructions | Support for the population count instruction.
|
Sse4aInstructions | Support for SSE4a instructions.
|
FmaInstructions | Support for FMA instructions (3 operand variant)
|
VexInstructions | Support for ternary instruction coding (VEX)
|
Bmi2Instructions | Support for BMI2 instructions.
|
Definition at line 514 of file global.h.
bool Vc::currentImplementationSupported |
( |
| ) |
|
|
inline |
Tests that the CPU and Operating System support the vector unit which was compiled for.
This function should be called before any other Vc functionality is used. It checks whether the program will work. If this function returns false
then the program should exit with a useful error message before the OS has to kill it because of an invalid instruction exception.
If the program continues and makes use of any vector features not supported by hard- or software then the program will crash.
Example:
int main()
{
std::cerr << "CPU or OS requirements not met for the compiled in vector unit!\n";
exit -1;
}
...
}
bool currentImplementationSupported()
Tests that the CPU and Operating System support the vector unit which was compiled for.
- Returns
true
if the OS and hardware support execution of the currently selected SIMD instructions.
-
false
otherwise
Definition at line 148 of file support.h.
std::ostream& Vc::Common::operator<< |
( |
std::ostream & |
s, |
|
|
const Vc::MemoryBase< V, Parent, Dimension, RM > & |
m |
|
) |
| |
|
inline |
Prints the contents of a Memory object into a stream object.
m[i] = i;
}
std::cout << m << std::endl;
A helper class for fixed-size two-dimensional arrays.
static constexpr size_t entriesCount()
will output (with SSE):
{[0, 1, 2, 3] [4, 5, 6, 7] [8, 9, 0, 0]}
- Parameters
-
s | Any standard C++ ostream object. For example std::cout or a std::stringstream object. |
m | Any Vc::Memory object. |
- Returns
- The ostream object: to chain multiple stream operations.
- Note
- With the GNU standard library this function will check whether the output stream is a tty in which case it colorizes the output.
- Warning
- Please do not forget that printing a large memory object can take a long time.
UnaryFunction Vc::simd_for_each |
( |
InputIt |
first, |
|
|
InputIt |
last, |
|
|
UnaryFunction |
f |
|
) |
| |
Vc variant of the std::for_each
algorithm.
This algorithm calls f
with one argument of type Vc::Vector<
iterator value type ,
unspecified >
as often as is needed to iterate over the complete range from first
to last
. It will try to use the best vector size (VectorAbi) to work on the largest chunks possible. To support aligned loads (and stores) and to support arbitrary range distances, the algorithm may require the use of Vc::VectorAbi
types that work on fewer elements in parallel.
The following example requires C++14 for generic lambdas. If you don't have generic lambdas available you can use a "classic" functor type with a templated call operator instead.
void scale(std::vector<double> &data, double factor) {
v *= factor;
});
}
UnaryFunction simd_for_each(InputIt first, InputIt last, UnaryFunction f)
Vc variant of the std::for_each algorithm.
enable_if<is_simd_mask<Mask>::value && is_simd_vector<T>::value, T> Vc::iif |
( |
const Mask & |
condition, |
|
|
const T & |
trueValue, |
|
|
const T & |
falseValue |
|
) |
| |
|
inlinedelete |
Function to mimic the ternary operator '?:' (inline-if).
- Parameters
-
condition | Determines which values are returned. This is analog to the first argument to the ternary operator. |
trueValue | The values to return where condition is true . |
falseValue | The values to return where condition is false . |
- Returns
- A combination of entries from
trueValue
and falseValue
, according to condition
.
So instead of the scalar variant
float x = a > 1.f ? b : b + c;
you'd write
enable_if< is_simd_mask< Mask >::value &&is_simd_vector< T >::value, T > iif(const Mask &condition, const T &trueValue, const T &falseValue)
Function to mimic the ternary operator '?:' (inline-if).
Vector< float > float_v
vector of single precision
Assuming a
has the values [0, 3, 5, 1], b
is [1, 1, 1, 1], and c
is [1, 2, 3, 4], then x will be [2, 2, 3, 5].
Definition at line 60 of file iif.h.
std::pair<V, V> Vc::interleave |
( |
const V & |
a, |
|
|
const V & |
b |
|
) |
| |
Interleaves the entries from a
and b
into two vectors of the same type.
The order in the returned vector contains the elements a[0], b[0], a[1], b[1], a[2], b[2], a[3], b[3], ...
.
Example:
std::cout << a << b;
Data-parallel arithmetic type with user-defined number of elements.
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
- Parameters
-
a | input vector whose data will appear at even indexes in the output |
b | input vector whose data will appear at odd indexes in the output |
- Returns
- two vectors with data from
a
and b
interleaved
Definition at line 55 of file interleave.h.
constexpr WhereImpl::WhereMask<M> Vc::where |
( |
const M & |
mask | ) |
|
|
constexpr |
Conditional assignment.
Since compares between SIMD vectors do not return a single boolean, but rather a vector of booleans (mask), one often cannot use if / else statements. Instead, one needs to state that only a subset of entries of a given SIMD vector should be modified. The where
function can be prepended to any assignment operation to execute a masked assignment.
- Parameters
-
mask | The mask that selects the entries in the target vector that will be modified. |
- Returns
- This function returns an opaque object that binds to the left operand of an assignment via the binary-or operator or the functor operator. (i.e. either
where(mask) | x = y
or where(mask)(x) = y
)
Example:
template<typename T> void f1(T &x, T &y)
{
if (x < 2) {
x *= y;
y += 2;
}
}
template<typename T> void f2(T &x, T &y)
{
}
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.
The block following the if statement in f1
will be executed if x < 2
evaluates to true
. If T
is a scalar type you normally get what you expect. But if T
is a SIMD vector type, the comparison will use the implicit conversion from a mask to bool, meaning all_of(x < 2)
.
Most of the time the required operation is a masked assignment as stated in f2
.
Definition at line 265 of file where.h.
Referenced by Vc::iif().
constexpr AlignedTag Aligned |
|
constexpr |
constexpr UnalignedTag Unaligned |
|
constexpr |
Use this object for a flags
parameter to request unaligned loads and stores.
It specifies that a load/store can not expect a memory address that is aligned on the correct boundary. (i.e. alignment is less than MemoryAlignment
)
- Note
- If you specify Unaligned, but the memory address is aligned the load/store will execute slightly slower than necessary.
Definition at line 191 of file loadstoreflags.h.
Referenced by SimdArray< T, N, V, Wt >::reversed(), SimdArray< T, N, V, Wt >::rotated(), and MemoryBase< V, Parent, Dimension, RowMemory >::vector().
constexpr StreamingTag Streaming |
|
constexpr |
Use this object for a flags
parameter to request streaming loads and stores.
It specifies that the cache should be bypassed for the given load/store. Whether this will actually be done depends on the target system's capabilities.
Streaming stores can be interesting when the code calculates values that, after being written to memory, will not be used for a long time or used by a different thread.
- Note
- Expect that most target systems do not support unaligned streaming loads or stores. Therefore, make sure that you also specify Aligned.
Definition at line 206 of file loadstoreflags.h.