28 #ifndef VC_COMMON_SIMDARRAY_H_ 29 #define VC_COMMON_SIMDARRAY_H_ 37 #include "writemaskedvector.h" 38 #include "simdarrayhelper.h" 39 #include "simdmaskarray.h" 41 #include "interleave.h" 42 #include "indexsequence.h" 43 #include "transpose.h" 46 namespace Vc_VERSIONED_NAMESPACE
57 template <std::size_t N,
class... Candidates>
struct select_best_vector_type_impl;
59 template <std::
size_t N,
class T>
struct select_best_vector_type_impl<N, T> {
63 template <std::size_t N,
class T,
class... Candidates>
64 struct select_best_vector_type_impl<N, T, Candidates...> {
65 using type =
typename std::conditional<
66 (N < T::Size),
typename select_best_vector_type_impl<N, Candidates...>::type,
69 template <
class T, std::
size_t N>
70 struct select_best_vector_type : select_best_vector_type_impl<N,
73 #elif defined Vc_IMPL_AVX
79 Vc::Scalar::Vector<T>> {
87 template <
typename T> T Vc_INTRINSIC Vc_PURE product_helper_(
const T &l,
const T &r) {
return l * r; }
88 template <
typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(
const T &l,
const T &r) {
return l + r; }
92 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
95 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
104 #define Vc_CURRENT_CLASS_NAME SimdArray 114 template <
typename T, std::
size_t N,
typename VectorType_>
117 static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
118 std::is_same<T, int32_t>::value ||
119 std::is_same<T, uint32_t>::value ||
120 std::is_same<T, int16_t>::value ||
121 std::is_same<T, uint16_t>::value,
122 "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, " 123 "int16_t, uint16_t }");
125 std::is_same<VectorType_,
126 typename Common::select_best_vector_type<T, N>::type>::value &&
127 VectorType_::size() == N,
128 "ERROR: leave the third and fourth template parameters with their defaults. They " 129 "are implementation details.");
132 static constexpr
bool is_atomic =
true;
133 using VectorType = VectorType_;
134 using vector_type = VectorType;
135 using storage_type = vector_type;
136 using vectorentry_type =
typename vector_type::VectorEntryType;
140 static constexpr std::size_t size() {
return N; }
144 using VectorEntryType = vectorentry_type;
148 using reference = Detail::ElementReference<SimdArray>;
149 static constexpr std::size_t Size = size();
163 Vc_INTRINSIC SimdArray(
value_type &&a) : data(a) {}
166 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
167 Vc_INTRINSIC SimdArray(U a)
168 : SimdArray(static_cast<value_type>(a))
173 template <
class U,
class V,
class = enable_if<N == V::Size>>
175 : data(simd_cast<vector_type>(internal_data(x)))
178 template <
class U,
class V,
class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
181 : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
182 internal_data(internal_data1(x))))
185 template <
class U,
class V,
class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
186 class = U,
class = U>
188 : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
189 internal_data(internal_data1(internal_data0(x))),
190 internal_data(internal_data0(internal_data1(x))),
191 internal_data(internal_data1(internal_data1(x)))))
195 template <
typename V, std::
size_t Pieces, std::
size_t Index>
196 Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
197 : data(simd_cast<vector_type, Index>(x.data))
201 Vc_INTRINSIC SimdArray(
const std::initializer_list<value_type> &init)
204 Vc_ASSERT(init.size() == size());
211 Vc_INTRINSIC SimdArray(
const V &x)
212 : data(simd_cast<vector_type>(x))
218 template <
typename U,
typename A,
221 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
235 #include "gatherinterface.h" 236 #include "scatterinterface.h" 238 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
239 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
240 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
243 template <std::
size_t Offset>
244 explicit Vc_INTRINSIC SimdArray(
245 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
251 Vc_INTRINSIC
void setZero() { data.setZero(); }
252 Vc_INTRINSIC
void setZero(
mask_type k) { data.setZero(internal_data(k)); }
253 Vc_INTRINSIC
void setZeroInverted() { data.setZeroInverted(); }
254 Vc_INTRINSIC
void setZeroInverted(
mask_type k) { data.setZeroInverted(internal_data(k)); }
256 Vc_INTRINSIC
void setQnan() { data.setQnan(); }
257 Vc_INTRINSIC
void setQnan(
mask_type m) { data.setQnan(internal_data(m)); }
260 template <
typename Op,
typename... Args>
264 Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
268 template <
typename Op,
typename... Args>
269 static Vc_INTRINSIC
void callOperation(Op op, Args &&... args)
271 Common::unpackArgumentsAuto(op,
nullptr, std::forward<Args>(args)...);
288 return fromOperation(Common::Operations::random());
293 class = enable_if<std::is_arithmetic<U>::value &&
294 Traits::is_load_store_flag<Flags>::value>>
295 explicit Vc_INTRINSIC SimdArray(
const U *mem, Flags f = Flags()) : data(mem, f)
299 template <
typename... Args> Vc_INTRINSIC
void load(Args &&... args)
301 data.load(std::forward<Args>(args)...);
304 template <
typename... Args> Vc_INTRINSIC
void store(Args &&... args)
const 306 data.store(std::forward<Args>(args)...);
311 return {private_init, !data};
316 return {private_init, -data};
324 return {private_init, ~data};
327 template <
typename U,
328 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
331 return {private_init, data << x};
333 template <
typename U,
334 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
340 template <
typename U,
341 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
344 return {private_init, data >> x};
346 template <
typename U,
347 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
354 #define Vc_BINARY_OPERATOR_(op) \ 355 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \ 357 data op## = rhs.data; \ 360 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
361 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
362 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
363 #undef Vc_BINARY_OPERATOR_ 366 Vc_DEPRECATED(
"use isnegative(x) instead") Vc_INTRINSIC
MaskType isNegative()
const 373 Vc_INTRINSIC
static value_type get(
const SimdArray &o,
int i) noexcept
377 template <
typename U>
378 Vc_INTRINSIC
static void set(SimdArray &o,
int i, U &&v) noexcept(
379 noexcept(std::declval<value_type &>() = v))
391 Vc_INTRINSIC reference operator[](
size_t i) noexcept
393 static_assert(noexcept(reference{std::declval<SimdArray &>(),
int()}),
"");
394 return {*
this, int(i)};
396 Vc_INTRINSIC
value_type operator[](
size_t i)
const noexcept
398 return get(*
this, int(i));
401 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
const mask_type &k)
408 data.assign(v.data, internal_data(k));
412 #define Vc_REDUCTION_FUNCTION_(name_) \ 413 Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \ 414 Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \ 416 return data.name_(internal_data(mask)); \ 418 Vc_NOTHING_EXPECTING_SEMICOLON 419 Vc_REDUCTION_FUNCTION_(
min);
420 Vc_REDUCTION_FUNCTION_(
max);
421 Vc_REDUCTION_FUNCTION_(product);
422 Vc_REDUCTION_FUNCTION_(sum);
423 #undef Vc_REDUCTION_FUNCTION_ 431 return {private_init, data.
apply(std::forward<F>(f))};
435 return {private_init, data.
apply(std::forward<F>(f), k)};
440 return {private_init, data.shifted(amount)};
443 template <std::
size_t NN>
447 return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
452 return {private_init, data.
rotated(amount)};
458 return {private_init,
exponent(data)};
463 return {private_init, data.interleaveLow(x.data)};
467 return {private_init, data.interleaveHigh(x.data)};
472 return {private_init, data.
reversed()};
477 return {private_init, data.
sorted()};
480 template <
class G,
class = decltype(std::declval<G>()(std::
size_t())),
481 class = enable_if<!Traits::is_simd_vector<G>::value>>
482 Vc_INTRINSIC SimdArray(
const G &gen) : data(gen)
487 return {private_init, VectorType::generate(gen)};
490 Vc_DEPRECATED(
"use copysign(x, y) instead")
496 friend VectorType &internal_data<>(SimdArray &x);
497 friend const VectorType &internal_data<>(
const SimdArray &x);
500 Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
502 Vc_FREE_STORE_OPERATORS_ALIGNED(
alignof(storage_type));
508 alignas(
static_cast<std::size_t
>(
509 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(VectorType_) /
510 VectorType_::size()>::value)) storage_type data;
513 template <
typename T, std::
size_t N,
typename VectorType>
515 template <
typename T, std::
size_t N,
typename VectorType>
523 template <
typename T, std::
size_t N,
typename VectorType>
533 template <
class T> Vc_INTRINSIC T unwrap(
const T &x) {
return x; }
535 template <
class T,
size_t N,
class V>
538 return internal_data(x);
541 template <
class T,
size_t Pieces,
size_t Index>
542 Vc_INTRINSIC
auto unwrap(
const Common::Segment<T, Pieces, Index> &x)
543 -> decltype(x.to_fixed_size())
545 return unwrap(x.to_fixed_size());
549 template <
typename T, std::
size_t N,
typename VectorType>
550 template <
class MT,
class IT,
int Scale>
552 const Common::GatherArguments<MT, IT, Scale> &args)
554 data.
gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
556 template <
typename T, std::
size_t N,
typename VectorType>
557 template <
class MT,
class IT,
int Scale>
559 const Common::GatherArguments<MT, IT, Scale> &args,
MaskArgument mask)
561 data.
gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
566 template <
typename T, std::
size_t N,
typename VectorType>
567 template <
typename MT,
typename IT>
571 data.
scatter(mem, unwrap(std::forward<IT>(indexes)));
573 template <
typename T, std::
size_t N,
typename VectorType>
574 template <
typename MT,
typename IT>
579 data.
scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
615 template <
typename T,
size_t N,
typename V,
size_t Wt>
class SimdArray
617 static_assert(std::is_same<T, double>::value ||
618 std::is_same<T, float>::value ||
619 std::is_same<T, int32_t>::value ||
620 std::is_same<T, uint32_t>::value ||
621 std::is_same<T, int16_t>::value ||
622 std::is_same<T, uint16_t>::value,
"SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
624 std::is_same<V,
typename Common::select_best_vector_type<T, N>::type>::value &&
626 "ERROR: leave the third and fourth template parameters with their defaults. They " 627 "are implementation details.");
630 std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
632 (N % V::size() == 0),
633 "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * " 634 "MIC::(u)short_v::size(), i.e. k * 16.");
636 using my_traits = SimdArrayTraits<T, N>;
637 static constexpr std::size_t N0 = my_traits::N0;
638 static constexpr std::size_t N1 = my_traits::N1;
639 using Split = Common::Split<N0>;
640 template <
typename U, std::
size_t K>
using CArray = U[K];
643 static constexpr
bool is_atomic =
false;
646 static_assert(storage_type0::size() == N0,
"");
651 using vector_type = V;
652 using vectorentry_type =
typename storage_type0::vectorentry_type;
653 typedef vectorentry_type alias_type Vc_MAY_ALIAS;
674 static constexpr std::size_t
size() {
return N; }
681 using VectorEntryType = vectorentry_type;
686 using AsArg =
const SimdArray &;
688 using reference = Detail::ElementReference<SimdArray>;
691 static constexpr std::size_t MemoryAlignment =
720 return fromOperation(Common::Operations::random());
723 template <
class G,
class = decltype(std::declval<G>()(std::
size_t())),
724 class = enable_if<!Traits::is_simd_vector<G>::value>>
725 Vc_INTRINSIC SimdArray(
const G &gen)
726 : data0(gen), data1([&](std::size_t i) {
return gen(i + storage_type0::size()); })
733 auto tmp = storage_type0::generate(gen);
738 return {std::move(tmp),
739 storage_type1::generate([&](std::size_t i) {
return gen(i + N0); })};
747 SimdArray() =
default;
757 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
759 : SimdArray(static_cast<value_type>(a))
765 SimdArray(
const SimdArray &) =
default;
766 SimdArray(SimdArray &&) =
default;
767 SimdArray &operator=(
const SimdArray &) =
default;
771 typename = enable_if<std::is_arithmetic<U>::value &&
772 Traits::is_load_store_flag<Flags>::value>>
773 explicit Vc_INTRINSIC SimdArray(
const U *mem, Flags f = Flags())
774 : data0(mem, f), data1(mem + storage_type0::size(), f)
786 template <
typename U, std::size_t Extent,
typename Flags =
DefaultLoadTag,
787 typename = enable_if<std::is_arithmetic<U>::value &&
788 Traits::is_load_store_flag<Flags>::value>>
789 explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
790 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
796 template <
typename U, std::size_t Extent,
typename Flags =
DefaultLoadTag,
797 typename = enable_if<std::is_arithmetic<U>::value &&
798 Traits::is_load_store_flag<Flags>::value>>
799 explicit Vc_INTRINSIC SimdArray(
const CArray<U, Extent> &mem, Flags f = Flags())
800 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
806 Vc_INTRINSIC SimdArray(
const std::initializer_list<value_type> &init)
810 Vc_ASSERT(init.size() == size());
813 #include "gatherinterface.h" 814 #include "scatterinterface.h" 816 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
817 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
818 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
820 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
821 storage_type0::size()>())
824 template <
size_t Offset>
825 explicit Vc_INTRINSIC SimdArray(
826 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
828 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
829 storage_type0::size() + Offset>())
834 template <
class W,
class = enable_if<
837 !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
839 Vc_INTRINSIC
explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
844 template <
class W,
class = enable_if<
846 Traits::simd_vector_size<W>::value == N &&
847 std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
849 Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
853 template <
class W, std::
size_t Pieces, std::
size_t Index>
854 Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
855 : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
856 , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
862 template <
typename U,
typename A,
865 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
882 Vc_INTRINSIC
void setZero()
887 Vc_INTRINSIC
void setZero(
const mask_type &k)
889 data0.setZero(Split::lo(k));
890 data1.setZero(Split::hi(k));
892 Vc_INTRINSIC
void setZeroInverted()
894 data0.setZeroInverted();
895 data1.setZeroInverted();
897 Vc_INTRINSIC
void setZeroInverted(
const mask_type &k)
899 data0.setZeroInverted(Split::lo(k));
900 data1.setZeroInverted(Split::hi(k));
904 Vc_INTRINSIC
void setQnan() {
908 Vc_INTRINSIC
void setQnan(
const mask_type &m) {
909 data0.setQnan(Split::lo(m));
910 data1.setQnan(Split::hi(m));
914 template <
typename Op,
typename... Args>
918 storage_type0::fromOperation(op, Split::lo(args)...),
921 storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
926 template <
typename Op,
typename... Args>
927 static Vc_INTRINSIC
void callOperation(Op op, Args &&... args)
929 storage_type0::callOperation(op, Split::lo(args)...);
930 storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
934 template <
typename U,
typename... Args> Vc_INTRINSIC
void load(
const U *mem, Args &&... args)
936 data0.load(mem, Split::lo(args)...);
938 data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
941 template <
typename U,
typename... Args> Vc_INTRINSIC
void store(U *mem, Args &&... args)
const 943 data0.store(mem, Split::lo(args)...);
945 data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
950 return {!data0, !data1};
955 return {-data0, -data1};
963 return {~data0, ~data1};
967 template <
typename U,
968 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
971 return {data0 << x, data1 << x};
973 template <
typename U,
974 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
981 template <
typename U,
982 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
985 return {data0 >> x, data1 >> x};
987 template <
typename U,
988 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
997 #define Vc_BINARY_OPERATOR_(op) \ 998 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \ 1000 data0 op## = rhs.data0; \ 1001 data1 op## = rhs.data1; \ 1004 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1005 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1006 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1007 #undef Vc_BINARY_OPERATOR_ 1015 Vc_INTRINSIC
static value_type get(
const SimdArray &o,
int i) noexcept
1017 return reinterpret_cast<const alias_type *
>(&o)[i];
1019 template <
typename U>
1020 Vc_INTRINSIC
static void set(SimdArray &o,
int i, U &&v) noexcept(
1021 noexcept(std::declval<value_type &>() = v))
1023 reinterpret_cast<alias_type *
>(&o)[i] = v;
1036 static_assert(noexcept(reference{std::declval<SimdArray &>(),
int()}),
"");
1037 return {*
this, int(i)};
1043 return get(*
this, int(index));
1049 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type>
operator()(
1052 return {*
this, mask};
1058 data0.assign(v.data0, internal_data0(k));
1059 data1.assign(v.data1, internal_data1(k));
1063 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \ 1065 template <typename ForSfinae = void> \ 1066 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \ 1067 storage_type0::Size == storage_type1::Size, \ 1068 value_type> name_##_impl() const \ 1070 return binary_fun_(data0, data1).name_(); \ 1073 template <typename ForSfinae = void> \ 1074 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \ 1075 storage_type0::Size != storage_type1::Size, \ 1076 value_type> name_##_impl() const \ 1078 return scalar_fun_(data0.name_(), data1.name_()); \ 1083 Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \ 1085 Vc_INTRINSIC value_type name_(const mask_type &mask) const \ 1087 if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \ 1088 return data1.name_(Split::hi(mask)); \ 1089 } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \ 1090 return data0.name_(Split::lo(mask)); \ 1092 return scalar_fun_(data0.name_(Split::lo(mask)), \ 1093 data1.name_(Split::hi(mask))); \ 1096 Vc_NOTHING_EXPECTING_SEMICOLON 1099 Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1100 Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1101 #undef Vc_REDUCTION_FUNCTION_ 1107 tmp[0] += ps0[data0.size() - 1];
1115 return {data0.apply(f), data1.apply(f)};
1118 template <
typename F>
1121 return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1128 constexpr
int SSize = Size;
1129 constexpr
int SSize0 = storage_type0::Size;
1130 constexpr
int SSize1 = storage_type1::Size;
1135 if (amount > -SSize0) {
1136 return {data0.shifted(amount), data1.shifted(amount, data0)};
1138 if (amount == -SSize0) {
1139 return {storage_type0(0), simd_cast<storage_type1>(data0)};
1141 if (amount < -SSize0) {
1142 return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1147 if (amount >= SSize) {
1149 }
else if (amount >= SSize0) {
1151 simd_cast<storage_type0>(data1).
shifted(amount - SSize0),
1153 }
else if (amount >= SSize1) {
1154 return {data0.shifted(amount, data1), storage_type1(0)};
1156 return {data0.shifted(amount, data1), data1.shifted(amount)};
1161 template <std::
size_t NN>
1163 !(std::is_same<storage_type0, storage_type1>::value &&
1168 constexpr
int SSize = Size;
1173 return operator[](i);
1174 }
else if (i >= -SSize) {
1175 return shiftIn[i + SSize];
1183 return operator[](i);
1184 }
else if (i < 2 * SSize) {
1185 return shiftIn[i - SSize];
1194 template <std::
size_t NN>
struct bisectable_shift
1195 :
public std::integral_constant<bool,
1196 std::is_same<storage_type0, storage_type1>::value &&
1202 template <std::
size_t NN>
1204 enable_if<bisectable_shift<NN>::value,
int> amount,
1207 constexpr
int SSize = Size;
1209 if (amount > -static_cast<int>(storage_type0::Size)) {
1210 return {data0.shifted(amount, internal_data1(shiftIn)),
1211 data1.shifted(amount, data0)};
1213 if (amount == -static_cast<int>(storage_type0::Size)) {
1214 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1216 if (amount > -SSize) {
1218 internal_data1(shiftIn)
1219 .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1220 data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1222 if (amount == -SSize) {
1225 if (amount > -2 * SSize) {
1226 return shiftIn.
shifted(amount + SSize);
1232 if (amount < static_cast<int>(storage_type0::Size)) {
1233 return {data0.shifted(amount, data1),
1234 data1.shifted(amount, internal_data0(shiftIn))};
1236 if (amount == static_cast<int>(storage_type0::Size)) {
1237 return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1239 if (amount < SSize) {
1240 return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1241 internal_data0(shiftIn)
1242 .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1244 if (amount == SSize) {
1247 if (amount < 2 * SSize) {
1248 return shiftIn.
shifted(amount - SSize);
1257 amount %= int(size());
1260 }
else if (amount < 0) {
1277 auto &&d0cvtd = simd_cast<storage_type1>(data0);
1278 auto &&d1cvtd = simd_cast<storage_type0>(data1);
1279 constexpr
int size0 = storage_type0::size();
1280 constexpr
int size1 = storage_type1::size();
1282 if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1283 return {std::move(d1cvtd), std::move(d0cvtd)};
1284 }
else if (amount < size1) {
1285 return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1286 }
else if (amount == size1) {
1287 return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1288 }
else if (
int(size()) - amount < size1) {
1289 return {data0.shifted(amount -
int(size()), d1cvtd.shifted(size1 - size0)),
1290 data1.shifted(amount -
int(size()), data0.shifted(size0 - size1))};
1291 }
else if (
int(size()) - amount == size1) {
1292 return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1293 simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1294 }
else if (amount <= size0) {
1295 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1296 simd_cast<storage_type1>(data0.shifted(amount - size1))};
1298 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1299 simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1310 return {data0.interleaveLow(x.data0),
1311 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1316 return interleaveHighImpl(
1318 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1325 return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1330 return {data0.interleaveHigh(x.data0)
1332 simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1333 data1.interleaveHigh(x.data1)};
1340 if (std::is_same<storage_type0, storage_type1>::value) {
1341 return {simd_cast<storage_type0>(data1).reversed(),
1342 simd_cast<storage_type1>(data0).reversed()};
1353 return {data0.shifted(storage_type1::Size, data1).reversed(),
1354 simd_cast<storage_type1>(data0.reversed().shifted(
1355 storage_type0::Size - storage_type1::Size))};
1363 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1369 #ifdef Vc_DEBUG_SORTED 1370 std::cerr <<
"-- " << data0 << data1 <<
'\n';
1372 const auto a = data0.
sorted();
1374 const auto lo =
Vc::min(a, b);
1375 const auto hi =
Vc::max(a, b);
1376 return {lo.sorted(), hi.sorted()};
1382 using SortableArray =
1384 auto sortable = simd_cast<SortableArray>(*this);
1385 for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1386 using limits = std::numeric_limits<value_type>;
1387 if (limits::has_infinity) {
1388 sortable[i] = limits::infinity();
1427 static constexpr std::size_t Size = size();
1430 Vc_DEPRECATED(
"use exponent(x) instead")
1433 return {exponent(data0), exponent(data1)};
1437 Vc_DEPRECATED(
"use isnegative(x) instead") Vc_INTRINSIC
MaskType isNegative()
const 1443 Vc_DEPRECATED(
"use copysign(x, y) instead")
1452 friend storage_type0 &internal_data0<>(SimdArray &x);
1453 friend storage_type1 &internal_data1<>(SimdArray &x);
1454 friend const storage_type0 &internal_data0<>(
const SimdArray &x);
1455 friend const storage_type1 &internal_data1<>(
const SimdArray &x);
1458 Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y)
1459 : data0(std::move(x)), data1(std::move(y))
1463 Vc_FREE_STORE_OPERATORS_ALIGNED(
alignof(storage_type0));
1469 alignas(
static_cast<std::size_t
>(
1470 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(V) /
1471 V::size()>::value)) storage_type0 data0;
1472 storage_type1 data1;
1474 #undef Vc_CURRENT_CLASS_NAME 1475 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1477 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1481 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1482 template <
class MT,
class IT,
int Scale>
1484 const Common::GatherArguments<MT, IT, Scale> &args)
1486 data0.
gather(Common::make_gather<Scale>(
1487 args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1488 data1.gather(Common::make_gather<Scale>(
1489 args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1491 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1492 template <
class MT,
class IT,
int Scale>
1494 const Common::GatherArguments<MT, IT, Scale> &args,
MaskArgument mask)
1496 data0.
gather(Common::make_gather<Scale>(
1497 args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1499 data1.gather(Common::make_gather<Scale>(
1500 args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1505 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1506 template <
typename MT,
typename IT>
1510 data0.
scatter(mem, Split::lo(Common::Operations::gather(),
1513 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1515 template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1516 template <
typename MT,
typename IT>
1520 data0.
scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1523 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1529 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1539 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1549 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1559 template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1572 #if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX 1576 : data0(x), data1(0)
1585 #define Vc_FIXED_OP(op) \ 1586 template <class T, int N, \ 1587 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \ 1588 fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \ 1589 const fixed_size_simd<T, N> &b) \ 1591 return {private_init, internal_data(a) op internal_data(b)}; \ 1593 template <class T, int N, \ 1594 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \ 1596 fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \ 1597 const fixed_size_simd<T, N> &b) \ 1599 return {internal_data0(a) op internal_data0(b), \ 1600 internal_data1(a) op internal_data1(b)}; \ 1602 Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1603 Vc_ALL_BINARY(Vc_FIXED_OP);
1604 Vc_ALL_SHIFTS(Vc_FIXED_OP);
1606 #define Vc_FIXED_OP(op) \ 1607 template <class T, int N, \ 1608 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \ 1609 fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \ 1610 const fixed_size_simd<T, N> &b) \ 1612 return {private_init, internal_data(a) op internal_data(b)}; \ 1614 template <class T, int N, \ 1615 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \ 1617 fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \ 1618 const fixed_size_simd<T, N> &b) \ 1620 return {internal_data0(a) op internal_data0(b), \ 1621 internal_data1(a) op internal_data1(b)}; \ 1623 Vc_ALL_COMPARES(Vc_FIXED_OP);
1629 namespace result_vector_type_internal
1631 template <
typename T>
1632 using remove_cvref =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1634 template <
typename T>
1635 using is_integer_larger_than_int = std::integral_constant<
1636 bool, std::is_integral<T>::value &&(
sizeof(T) >
sizeof(
int) ||
1637 std::is_same<T, long>::value ||
1638 std::is_same<T, unsigned long>::value)>;
1641 typename L,
typename R,
1646 !(Traits::is_fixed_size_simd<L>::value &&
1647 Traits::is_fixed_size_simd<R>::value) &&
1648 ((std::is_arithmetic<remove_cvref<L>>::value &&
1649 !is_integer_larger_than_int<remove_cvref<L>>::value) ||
1650 (std::is_arithmetic<remove_cvref<R>>::value &&
1651 !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1657 template <
typename L,
typename R, std::
size_t N>
struct evaluate<L, R, N, true>
1660 using LScalar = Traits::entry_type_of<L>;
1661 using RScalar = Traits::entry_type_of<R>;
1663 template <
bool B,
typename T,
typename F>
1664 using conditional =
typename std::conditional<B, T, F>::type;
1678 conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1679 sizeof(LScalar) <
sizeof(
int) &&
1680 sizeof(RScalar) <
sizeof(
int)),
1681 conditional<(
sizeof(LScalar) ==
sizeof(RScalar)),
1682 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1683 conditional<(sizeof(LScalar) >
sizeof(RScalar)), LScalar, RScalar>>,
1684 decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1690 template <
typename L,
typename R>
1691 using result_vector_type =
typename result_vector_type_internal::evaluate<L, R>::type;
1693 #define Vc_BINARY_OPERATORS_(op_) \ 1695 template <typename L, typename R> \ 1696 Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \ 1698 using Return = result_vector_type<L, R>; \ 1699 return Vc::Detail::operator op_( \ 1700 static_cast<const Return &>(std::forward<L>(lhs)), \ 1701 static_cast<const Return &>(std::forward<R>(rhs))); \ 1720 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1722 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1724 #undef Vc_BINARY_OPERATORS_ 1725 #define Vc_BINARY_OPERATORS_(op_) \ 1727 template <typename L, typename R> \ 1728 Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \ 1731 using Promote = result_vector_type<L, R>; \ 1732 return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \ 1751 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1754 #undef Vc_BINARY_OPERATORS_ 1757 #define Vc_FORWARD_UNARY_OPERATOR(name_) \ 1759 template <typename T, std::size_t N, typename V, std::size_t M> \ 1760 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \ 1762 return fixed_size_simd<T, N>::fromOperation( \ 1763 Common::Operations::Forward_##name_(), x); \ 1765 template <class T, int N> \ 1766 fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \ 1768 return fixed_size_simd<T, N>::fromOperation( \ 1769 Common::Operations::Forward_##name_(), x); \ 1771 Vc_NOTHING_EXPECTING_SEMICOLON 1773 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \ 1775 template <typename T, std::size_t N, typename V, std::size_t M> \ 1776 inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \ 1778 return fixed_size_simd_mask<T, N>::fromOperation( \ 1779 Common::Operations::Forward_##name_(), x); \ 1781 template <class T, int N> \ 1782 fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \ 1784 return fixed_size_simd_mask<T, N>::fromOperation( \ 1785 Common::Operations::Forward_##name_(), x); \ 1787 Vc_NOTHING_EXPECTING_SEMICOLON 1789 #define Vc_FORWARD_BINARY_OPERATOR(name_) \ 1791 template <typename T, std::size_t N, typename V, std::size_t M> \ 1792 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \ 1793 const SimdArray<T, N, V, M> &y) \ 1795 return fixed_size_simd<T, N>::fromOperation( \ 1796 Common::Operations::Forward_##name_(), x, y); \ 1798 Vc_NOTHING_EXPECTING_SEMICOLON 1804 Vc_FORWARD_UNARY_OPERATOR(abs);
1806 Vc_FORWARD_UNARY_OPERATOR(asin);
1807 Vc_FORWARD_UNARY_OPERATOR(atan);
1809 Vc_FORWARD_UNARY_OPERATOR(ceil);
1811 Vc_FORWARD_UNARY_OPERATOR(cos);
1812 Vc_FORWARD_UNARY_OPERATOR(exp);
1813 Vc_FORWARD_UNARY_OPERATOR(exponent);
1814 Vc_FORWARD_UNARY_OPERATOR(floor);
1816 template <
typename T, std::
size_t N>
1822 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1823 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1824 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1825 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1827 template <
typename T, std::
size_t N>
1833 template <
typename T, std::
size_t N>
1838 Vc_FORWARD_UNARY_OPERATOR(log);
1839 Vc_FORWARD_UNARY_OPERATOR(log10);
1840 Vc_FORWARD_UNARY_OPERATOR(log2);
1841 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1842 Vc_FORWARD_UNARY_OPERATOR(round);
1843 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1844 Vc_FORWARD_UNARY_OPERATOR(sin);
1846 template <
typename T, std::
size_t N>
1851 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1852 Vc_FORWARD_UNARY_OPERATOR(trunc);
1853 Vc_FORWARD_BINARY_OPERATOR(
min);
1854 Vc_FORWARD_BINARY_OPERATOR(
max);
1856 #undef Vc_FORWARD_UNARY_OPERATOR 1857 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR 1858 #undef Vc_FORWARD_BINARY_OPERATOR 1862 #define Vc_DUMMY_ARG0 , int = 0 1863 #define Vc_DUMMY_ARG1 , long = 0 1864 #define Vc_DUMMY_ARG2 , short = 0 1865 #define Vc_DUMMY_ARG3 , char = '0' 1866 #define Vc_DUMMY_ARG4 , unsigned = 0u 1867 #define Vc_DUMMY_ARG5 , unsigned short = 0u 1869 #define Vc_DUMMY_ARG0 1870 #define Vc_DUMMY_ARG1 1871 #define Vc_DUMMY_ARG2 1872 #define Vc_DUMMY_ARG3 1873 #define Vc_DUMMY_ARG4 1874 #define Vc_DUMMY_ARG5 1881 template <
typename Return, std::size_t N,
typename T,
typename... From>
1882 Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return>
1883 simd_cast_impl_smaller_input(
const From &... xs,
const T &last)
1885 Return r = simd_cast<Return>(xs...);
1886 for (
size_t i = 0; i < N; ++i) {
1887 r[i + N *
sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1891 template <
typename Return, std::
size_t N,
typename T>
1892 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(
const T &last)
1894 Return r = Return();
1895 for (
size_t i = 0; i < N; ++i) {
1896 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1900 template <
typename Return, std::size_t N,
typename T,
typename... From>
1901 Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1902 const From &... xs,
const T &last)
1904 Return r = simd_cast<Return>(xs...);
1905 for (
size_t i = N *
sizeof...(From); i < Return::Size; ++i) {
1906 r[i] =
static_cast<typename Return::EntryType
>(last[i - N *
sizeof...(From)]);
1910 template <
typename Return, std::
size_t N,
typename T>
1911 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(
const T &last)
1913 Return r = Return();
1914 for (
size_t i = 0; i < Return::size(); ++i) {
1915 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1921 template <
typename Return,
typename T,
typename... From>
1922 Vc_INTRINSIC_L Vc_CONST_L Return
1923 simd_cast_without_last(
const From &... xs,
const T &) Vc_INTRINSIC_R Vc_CONST_R;
1926 template <
typename... Ts>
struct are_all_types_equal;
1927 template <
typename T>
1928 struct are_all_types_equal<T> :
public std::integral_constant<bool, true>
1931 template <
typename T0,
typename T1,
typename... Ts>
1932 struct are_all_types_equal<T0, T1, Ts...>
1933 :
public std::integral_constant<
1934 bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1958 template <
typename Return,
typename... Ts>
1959 Vc_INTRINSIC Vc_CONST Return
1960 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b);
1964 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1965 Vc_INTRINSIC Vc_CONST
1966 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1967 simd_cast_with_offset(
const From &x,
const Froms &... xs);
1969 template <
typename Return, std::
size_t offset,
typename From>
1970 Vc_INTRINSIC Vc_CONST
1971 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1972 simd_cast_with_offset(
const From &x);
1974 template <
typename Return, std::
size_t offset,
typename From>
1975 Vc_INTRINSIC Vc_CONST
1976 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1978 !Traits::isAtomicSimdArray<Return>::value) ||
1980 !Traits::isAtomicSimdMaskArray<Return>::value))),
1982 simd_cast_with_offset(
const From &x);
1984 template <
typename Return, std::
size_t offset,
typename From>
1985 Vc_INTRINSIC Vc_CONST
1986 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1988 Traits::isAtomicSimdArray<Return>::value) ||
1990 Traits::isAtomicSimdMaskArray<Return>::value))),
1992 simd_cast_with_offset(
const From &x);
1994 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1995 Vc_INTRINSIC Vc_CONST enable_if<
1996 (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1997 simd_cast_with_offset(
const From &,
const Froms &... xs)
1999 return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2003 template <
typename Return, std::
size_t offset,
typename From>
2004 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2011 template <
typename T,
typename... Ts>
struct first_type_of_impl
2015 template <
typename... Ts>
using first_type_of =
typename first_type_of_impl<Ts...>::type;
2018 template <
typename Return,
typename From>
2019 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2020 template <
typename Return,
typename... Froms>
2021 Vc_INTRINSIC Vc_CONST
2022 enable_if<(are_all_types_equal<Froms...>::value &&
2023 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2025 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2029 template <
typename Return,
typename From,
typename... Froms>
2030 Vc_INTRINSIC Vc_CONST enable_if<
2031 (are_all_types_equal<From, Froms...>::value &&
2032 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
2034 simd_cast_drop_arguments(Froms... xs, From x, From);
2035 template <
typename Return,
typename From>
2036 Vc_INTRINSIC Vc_CONST
2037 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2038 simd_cast_drop_arguments(From x, From);
2042 #ifdef Vc_DEBUG_SIMD_CAST 2043 void debugDoNothing(
const std::initializer_list<void *> &) {}
2044 template <
typename T0,
typename... Ts>
2045 inline void vc_debug_(
const char *prefix,
const char *suffix,
const T0 &arg0,
2048 std::cerr << prefix << arg0;
2049 debugDoNothing({&(std::cerr <<
", " << args)...});
2050 std::cerr << suffix;
2053 template <
typename T0,
typename... Ts>
2054 Vc_INTRINSIC
void vc_debug_(
const char *,
const char *,
const T0 &,
const Ts &...)
2061 template <
size_t A,
size_t B>
2062 struct is_less :
public std::integral_constant<bool, (A < B)> {
2067 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2071 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2072 template <typename Return, typename T, typename A, typename... Froms> \
2073 Vc_INTRINSIC Vc_CONST enable_if< \
2074 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2075 is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2076 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2077 !detail::is_fixed_size_abi<A>::value), \
2079 simd_cast(NativeType_<T, A> x, Froms... xs) \
2081 vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2082 return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2084 template <typename Return, typename T, typename A, typename... Froms> \
2085 Vc_INTRINSIC Vc_CONST enable_if< \
2086 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2087 !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2088 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2089 !detail::is_fixed_size_abi<A>::value), \
2091 simd_cast(NativeType_<T, A> x, Froms... xs) \
2093 vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2094 return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2096 template <typename Return, typename T, typename A, typename... Froms> \
2097 Vc_INTRINSIC Vc_CONST \
2098 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2099 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2100 is_less<Common::left_size<Return::Size>(), \
2101 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2102 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2103 !detail::is_fixed_size_abi<A>::value), \
2105 simd_cast(NativeType_<T, A> x, Froms... xs) \
2107 vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2108 using R0 = typename Return::storage_type0; \
2109 using R1 = typename Return::storage_type1; \
2110 return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2111 simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2113 template <typename Return, typename T, typename A, typename... Froms> \
2114 Vc_INTRINSIC Vc_CONST \
2115 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2116 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2117 !is_less<Common::left_size<Return::Size>(), \
2118 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2119 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2120 !detail::is_fixed_size_abi<A>::value), \
2122 simd_cast(NativeType_<T, A> x, Froms... xs) \
2124 vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2125 using R0 = typename Return::storage_type0; \
2126 using R1 = typename Return::storage_type1; \
2127 return {simd_cast<R0>(x, xs...), R1(0)}; \
2129 Vc_NOTHING_EXPECTING_SEMICOLON
2131 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2132 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2133 #undef Vc_SIMDARRAY_CASTS
2136 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2138 template <typename Return, int offset, typename T, typename A> \
2139 Vc_INTRINSIC Vc_CONST \
2140 enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2141 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2143 vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2144 return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2147 template <typename Return, int offset, typename T, typename A> \
2148 Vc_INTRINSIC Vc_CONST \
2149 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2150 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2151 Return::Size * offset + Common::left_size<Return::Size>() < \
2152 NativeType_<T, A>::Size), \
2154 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2156 vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2157 using R0 = typename Return::storage_type0; \
2158 constexpr int entries_offset = offset * Return::Size; \
2159 constexpr int entries_offset_right = entries_offset + R0::Size; \
2161 simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2162 simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2167 template <typename Return, int offset, typename T, typename A> \
2168 Vc_INTRINSIC Vc_CONST \
2169 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2170 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2171 Return::Size * offset + Common::left_size<Return::Size>() >= \
2172 NativeType_<T, A>::Size), \
2174 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2176 vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2177 using R0 = typename Return::storage_type0; \
2178 using R1 = typename Return::storage_type1; \
2179 constexpr int entries_offset = offset * Return::Size; \
2180 return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2182 Vc_NOTHING_EXPECTING_SEMICOLON
2184 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2185 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2186 #undef Vc_SIMDARRAY_CASTS
2189 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2191 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2192 Vc_INTRINSIC Vc_CONST \
2193 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2194 (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2195 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2197 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2199 vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2200 return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2203 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2204 Vc_INTRINSIC Vc_CONST \
2205 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2206 (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2207 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2209 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2211 vc_debug_(
"simd_cast{indivisible2}(",
")\n", x0, xs...); \
2212 return simd_cast_without_last<Return, \
2213 typename SimdArrayType_<T, N, V, N>::storage_type, \
2214 typename From::storage_type...>( \
2215 internal_data(x0), internal_data(xs)...); \
2218 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2220 Vc_INTRINSIC Vc_CONST enable_if< \
2221 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2222 !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2223 is_less<N *
sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2225 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2227 vc_debug_(
"simd_cast{bisectable}(",
")\n", x0, xs...); \
2228 return simd_cast_interleaved_argument_order< \
2229 Return,
typename SimdArrayType_<T, N, V, M>::storage_type0, \
2230 typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2231 internal_data1(x0), internal_data1(xs)...); \
2235 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2237 Vc_INTRINSIC Vc_CONST enable_if< \
2238 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2239 !is_less<N *
sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2241 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2243 vc_debug_(
"simd_cast{bisectable2}(",
")\n", x0, xs...); \
2244 return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2248 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2250 Vc_INTRINSIC Vc_CONST enable_if< \
2251 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2252 N * (1 +
sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2254 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2256 vc_debug_(
"simd_cast{remaining}(",
")\n", x0, xs...); \
2257 return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2258 From...>(x0, xs...); \
2261 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2263 Vc_INTRINSIC Vc_CONST enable_if< \
2264 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2265 N * (1 +
sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2267 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2269 vc_debug_(
"simd_cast{remaining2}(",
")\n", x0, xs...); \
2270 return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2271 From...>(x0, xs...); \
2274 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2275 Vc_INTRINSIC Vc_CONST \
2276 enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2277 simd_cast(
const SimdArrayType_<T, N, V, M> &x) \
2279 vc_debug_(
"simd_cast{single bisectable}(",
")\n", x); \
2280 return simd_cast<Return>(internal_data0(x)); \
2282 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2283 Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2284 N < 2 * Return::Size && is_power_of_2<N>::value), \
2286 simd_cast(
const SimdArrayType_<T, N, V, M> &x) \
2288 vc_debug_(
"simd_cast{single bisectable2}(",
")\n", x); \
2289 return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2291 Vc_NOTHING_EXPECTING_SEMICOLON
2293 Vc_SIMDARRAY_CASTS(SimdArray);
2295 #undef Vc_SIMDARRAY_CASTS 2296 template <
class Return,
class T,
int N,
class... Ts,
2297 class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2303 template <
class Return,
class T,
int N,
class... Ts,
2304 class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2312 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \ 2314 template <typename Return, int offset, typename T, std::size_t N, typename V, \ 2316 Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \ 2317 const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \ 2319 vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \ 2320 return simd_cast<Return>(x); \ 2323 template <typename Return, int offset, typename T, std::size_t N, typename V> \ 2324 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \ 2325 const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \ 2327 vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \ 2328 return simd_cast<Return, offset>(internal_data(x)); \ 2331 template <typename Return, int offset, typename T, std::size_t N, typename V, \ 2333 Vc_INTRINSIC Vc_CONST \ 2334 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \ 2335 offset != 0 && Common::left_size<N>() % Return::Size == 0), \ 2337 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \ 2339 vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \ 2340 return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \ 2341 internal_data1(x)); \ 2345 template <typename Return, int offset, typename T, std::size_t N, typename V, \ 2347 Vc_INTRINSIC Vc_CONST \ 2348 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \ 2349 offset != 0 && Common::left_size<N>() % Return::Size != 0), \ 2351 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \ 2353 vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \ 2354 return simd_cast_with_offset<Return, \ 2355 offset * Return::Size - Common::left_size<N>()>( \ 2356 internal_data1(x)); \ 2359 template <typename Return, int offset, typename T, std::size_t N, typename V, \ 2361 Vc_INTRINSIC Vc_CONST enable_if< \ 2363 offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \ 2365 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \ 2367 vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \ 2368 return simd_cast<Return, offset>(internal_data0(x)); \ 2371 template <typename Return, int offset, typename T, std::size_t N, typename V, \ 2373 Vc_INTRINSIC Vc_CONST \ 2374 enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \ 2375 offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \ 2377 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \ 2379 vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \ 2380 using R = typename Return::EntryType; \ 2381 Return r = Return(0); \ 2382 for (std::size_t i = offset * Return::Size; \ 2383 i < std::min(N, (offset + 1) * Return::Size); ++i) { \ 2384 r[i - offset * Return::Size] = static_cast<R>(x[i]); \ 2388 Vc_NOTHING_EXPECTING_SEMICOLON 2389 Vc_SIMDARRAY_CASTS(SimdArray);
2391 #undef Vc_SIMDARRAY_CASTS 2393 template <
typename Return,
typename From>
2394 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2396 return simd_cast<Return>(x);
2398 template <
typename Return,
typename... Froms>
2399 Vc_INTRINSIC Vc_CONST
2400 enable_if<(are_all_types_equal<Froms...>::value &&
2401 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2403 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2405 return simd_cast<Return>(xs..., x);
2410 template <
typename Return,
typename From,
typename... Froms>
2411 Vc_INTRINSIC Vc_CONST enable_if<
2412 (are_all_types_equal<From, Froms...>::value &&
2413 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
2415 simd_cast_drop_arguments(Froms... xs, From x, From)
2417 return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2419 template <
typename Return,
typename From>
2420 Vc_INTRINSIC Vc_CONST
2421 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2422 simd_cast_drop_arguments(From x, From)
2424 return simd_cast_drop_arguments<Return>(x);
2428 template <
typename Return, std::
size_t offset,
typename From>
2429 Vc_INTRINSIC Vc_CONST
2430 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2431 Return> simd_cast_with_offset(
const From &x)
2433 return simd_cast<Return, offset / Return::Size>(x);
2435 template <
typename Return, std::
size_t offset,
typename From>
2436 Vc_INTRINSIC Vc_CONST
2437 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2439 !Traits::isAtomicSimdArray<Return>::value) ||
2441 !Traits::isAtomicSimdMaskArray<Return>::value))),
2443 simd_cast_with_offset(
const From &x)
2445 using R0 =
typename Return::storage_type0;
2446 using R1 =
typename Return::storage_type1;
2447 return {simd_cast_with_offset<R0, offset>(x),
2448 simd_cast_with_offset<R1, offset + R0::Size>(x)};
2450 template <
typename Return, std::
size_t offset,
typename From>
2451 Vc_INTRINSIC Vc_CONST
2452 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2454 Traits::isAtomicSimdArray<Return>::value) ||
2456 Traits::isAtomicSimdMaskArray<Return>::value))),
2458 simd_cast_with_offset(
const From &x)
2460 return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2462 template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
2463 Vc_INTRINSIC Vc_CONST
2464 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2465 simd_cast_with_offset(
const From &x,
const Froms &... xs)
2467 return simd_cast<Return>(x, xs...);
2471 template <
typename Return,
typename T,
typename... From>
2472 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(
const From &... xs,
const T &)
2474 return simd_cast<Return>(xs...);
2483 template <std::
size_t I,
typename T0>
2484 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(
const T0 &a0,
const T0 &)
2488 template <std::
size_t I,
typename T0>
2489 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(
const T0 &,
const T0 &b0)
2496 template <std::size_t I,
typename T0,
typename... Ts>
2497 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(
const T0 &a0,
2505 template <std::size_t I,
typename T0,
typename... Ts>
2506 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(
const T0 &,
2514 template <std::size_t I,
typename T0,
typename... Ts>
2515 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(
const T0 &,
2520 return extract_interleaved<I - 2, Ts...>(a..., b...);
2523 template <
typename Return,
typename... Ts, std::size_t... Indexes>
2524 Vc_INTRINSIC Vc_CONST Return
2525 simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>,
const Ts &... a,
2528 return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2532 template <
typename Return,
typename... Ts>
2533 Vc_INTRINSIC Vc_CONST Return
2534 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b)
2536 using seq = make_index_sequence<
sizeof...(Ts)*2>;
2537 return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2541 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \ 2542 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \ 2544 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \ 2545 SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \ 2547 lhs(mask) op_ rhs; \ 2549 Vc_NOTHING_EXPECTING_SEMICOLON 2550 Vc_CONDITIONAL_ASSIGN( Assign, =);
2551 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2552 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2553 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2554 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2555 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2556 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2557 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2558 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2559 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2560 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2561 #undef Vc_CONDITIONAL_ASSIGN 2563 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \ 2564 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \ 2565 Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \ 2566 conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \ 2570 Vc_NOTHING_EXPECTING_SEMICOLON 2571 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2572 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2573 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2574 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2575 #undef Vc_CONDITIONAL_ASSIGN 2579 template <
typename T,
size_t N,
typename V>
2580 inline void transpose_impl(
2585 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2586 &internal_data(*r[2]), &internal_data(*r[3])};
2587 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2588 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2589 internal_data(std::get<1>(proxy.in)),
2590 internal_data(std::get<2>(proxy.in)),
2591 internal_data(std::get<3>(proxy.in))});
2594 template <
typename T,
typename V>
2595 inline void transpose_impl(
2602 internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2603 internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2604 internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2605 internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2606 internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2607 internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2608 internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2609 internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2612 template <
typename T,
typename V>
2613 inline void transpose_impl(
2618 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2619 &internal_data(*r[2]), &internal_data(*r[3])};
2620 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2621 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2622 internal_data(std::get<1>(proxy.in)),
2623 internal_data(std::get<2>(proxy.in)),
2624 internal_data(std::get<3>(proxy.in))});
2627 template <
typename T,
size_t N,
typename V>
2628 inline void transpose_impl(
2636 transpose_impl(TransposeTag<2, 4>(), &r0[0],
2637 TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2638 internal_data0(std::get<1>(proxy.in)),
2639 internal_data0(std::get<2>(proxy.in)),
2640 internal_data0(std::get<3>(proxy.in))});
2641 transpose_impl(TransposeTag<2, 4>(), &r1[0],
2642 TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2643 internal_data1(std::get<1>(proxy.in)),
2644 internal_data1(std::get<2>(proxy.in)),
2645 internal_data1(std::get<3>(proxy.in))});
2686 template <
class T,
size_t N,
class V,
size_t VSizeof>
2687 struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2688 template <
class I,
class... VV>
2689 static Vc_INTRINSIC
void interleave(T *
const data,
const I &i,
const VV &... vv)
2693 template <
class I,
class... VV>
2694 static Vc_INTRINSIC
void deinterleave(T
const *
const data,
const I &i, VV &... vv)
2735 template <
typename T,
size_t N,
typename V,
size_t VN>
2736 struct numeric_limits<
Vc::SimdArray<T, N, V, VN>> :
public numeric_limits<T> {
2743 static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2745 return numeric_limits<T>::lowest();
2747 static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2749 return numeric_limits<T>::epsilon();
2751 static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2753 return numeric_limits<T>::round_error();
2755 static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2757 return numeric_limits<T>::infinity();
2759 static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2761 return numeric_limits<T>::quiet_NaN();
2763 static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2765 return numeric_limits<T>::signaling_NaN();
2767 static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2769 return numeric_limits<T>::denorm_min();
2775 #endif // VC_COMMON_SIMDARRAY_H_ value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
The main vector class for expressing data parallelism.
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
fixed_size_simd< T, N > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vector apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
static fixed_size_simd< T, N > Zero()
Returns a vector with the entries initialized to zero.
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
fixed_size_simd< T, N > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
static fixed_size_simd< T, N > IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Identifies any possible SimdArray<T, N> type (independent of const/volatile or reference) ...
fixed_size_simd< T, N > atan2(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: atan2 function component-wise and concurrently.
fixed_size_simd< T, N > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Identifies any possible SimdMaskArray<T, N> type (independent of const/volatile or reference) ...
Data-parallel arithmetic type with user-defined number of elements.
The value member will either be the number of SIMD vector entries or 0 if T is not a SIMD type...
fixed_size_simd< T, N > rotated(int amount) const
Rotate vector entries to the left by amount.
Vector reversed() const
Returns a vector with all components reversed.
fixed_size_simd< T, N > reversed() const
Returns a vector with all components reversed.
Data-parallel mask type with user-defined number of boolean elements.
Vector sorted() const
Return a sorted copy of the vector.
Vector rotated(int amount) const
Rotate vector entries to the left by amount.
Vector shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
fixed_size_simd< T, N > sorted() const
Return a sorted copy of the vector.
fixed_size_simd_mask< T, N > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Identifies any SIMD vector type (independent of implementation or whether it's SimdArray<T, N>).
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
static fixed_size_simd< T, N > Random()
Returns a vector with pseudo-random entries.
Vector partialSum() const
Returns a vector containing the sum of all entries with smaller index.
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
static fixed_size_simd< T, N > One()
Returns a vector with the entries initialized to one.
fixed_size_simd< T, N > apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
fixed_size_simd< T, N > apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
value_type EntryType
The type of the elements (i.e. T)
void deinterleave(V *a, V *b, const M *memory, A align)
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
void gather(const MT *mem, const IT &indexes)
Gather function.
SimdArray(value_type a)
Broadcast Constructor.
The main SIMD mask class.
void load(const EntryType *mem)
Load the vector entries from mem, overwriting the previous values.
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
T value_type
The type of the elements (i.e. T)
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
Vector Classes Namespace.
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
void scatter(MT *mem, IT &&indexes) const
Scatter function.
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
fixed_size_simd< T, N > shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
static fixed_size_simd< T, N > generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
fixed_size_simd< T, N > operator+() const
Returns a copy of itself.
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.