Vc  1.4.0
SIMD Vector Classes for C++
simdarray.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SIMDARRAY_H_
29 #define VC_COMMON_SIMDARRAY_H_
30 
31 //#define Vc_DEBUG_SIMD_CAST 1
32 //#define Vc_DEBUG_SORTED 1
33 //#include "../IO"
34 
35 #include <array>
36 
37 #include "writemaskedvector.h"
38 #include "simdarrayhelper.h"
39 #include "simdmaskarray.h"
40 #include "utility.h"
41 #include "interleave.h"
42 #include "indexsequence.h"
43 #include "transpose.h"
44 #include "macros.h"
45 
46 namespace Vc_VERSIONED_NAMESPACE
47 {
48 // select_best_vector_type {{{
49 namespace Common
50 {
53 
57 template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
58 // last candidate; this one must work; assume it does:
59 template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
60  using type = T;
61 };
62 // check the next candidate; use it if N >= T::size(); recurse otherwise:
63 template <std::size_t N, class T, class... Candidates>
64 struct select_best_vector_type_impl<N, T, Candidates...> {
65  using type = typename std::conditional<
66  (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
67  T>::type;
68 };
69 template <class T, std::size_t N>
70 struct select_best_vector_type : select_best_vector_type_impl<N,
71 #ifdef Vc_IMPL_AVX2
72  Vc::AVX2::Vector<T>,
73 #elif defined Vc_IMPL_AVX
74  Vc::AVX::Vector<T>,
75 #endif
76 #ifdef Vc_IMPL_SSE
77  Vc::SSE::Vector<T>,
78 #endif
79  Vc::Scalar::Vector<T>> {
80 };
82 } // namespace Common
83 // }}}
84 // internal namespace (product & sum helper) {{{1
85 namespace internal
86 {
87 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
88 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
89 } // namespace internal
90 
91 // min & max declarations {{{1
92 template <typename T, std::size_t N, typename V, std::size_t M>
94  const SimdArray<T, N, V, M> &y);
95 template <typename T, std::size_t N, typename V, std::size_t M>
97  const SimdArray<T, N, V, M> &y);
98 
99 // SimdArray class {{{1
102 
103 // atomic SimdArray {{{1
104 #define Vc_CURRENT_CLASS_NAME SimdArray
105 
114 template <typename T, std::size_t N, typename VectorType_>
115 class SimdArray<T, N, VectorType_, N>
116 {
117  static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
118  std::is_same<T, int32_t>::value ||
119  std::is_same<T, uint32_t>::value ||
120  std::is_same<T, int16_t>::value ||
121  std::is_same<T, uint16_t>::value,
122  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
123  "int16_t, uint16_t }");
124  static_assert(
125  std::is_same<VectorType_,
126  typename Common::select_best_vector_type<T, N>::type>::value &&
127  VectorType_::size() == N,
128  "ERROR: leave the third and fourth template parameters with their defaults. They "
129  "are implementation details.");
130 
131 public:
132  static constexpr bool is_atomic = true;
133  using VectorType = VectorType_;
134  using vector_type = VectorType;
135  using storage_type = vector_type;
136  using vectorentry_type = typename vector_type::VectorEntryType;
137  using value_type = T;
140  static constexpr std::size_t size() { return N; }
141  using Mask = mask_type;
142  using MaskType = Mask;
143  using MaskArgument = const MaskType &;
144  using VectorEntryType = vectorentry_type;
146  using IndexType = index_type;
147  using AsArg = const SimdArray &;
148  using reference = Detail::ElementReference<SimdArray>;
149  static constexpr std::size_t Size = size();
150  static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
151 
152  // zero init
153  Vc_INTRINSIC SimdArray() = default;
154 
155  // default copy ctor/operator
156  Vc_INTRINSIC SimdArray(const SimdArray &) = default;
157  Vc_INTRINSIC SimdArray(SimdArray &&) = default;
158  Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
159 
160  // broadcast
161  Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
162  Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
163  Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
164  template <
165  typename U,
166  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
167  Vc_INTRINSIC SimdArray(U a)
168  : SimdArray(static_cast<value_type>(a))
169  {
170  }
171 
172  // implicit casts
173  template <class U, class V, class = enable_if<N == V::Size>>
174  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
175  : data(simd_cast<vector_type>(internal_data(x)))
176  {
177  }
178  template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
179  class = U>
180  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
181  : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
182  internal_data(internal_data1(x))))
183  {
184  }
185  template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
186  class = U, class = U>
187  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
188  : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
189  internal_data(internal_data1(internal_data0(x))),
190  internal_data(internal_data0(internal_data1(x))),
191  internal_data(internal_data1(internal_data1(x)))))
192  {
193  }
194 
195  template <typename V, std::size_t Pieces, std::size_t Index>
196  Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
197  : data(simd_cast<vector_type, Index>(x.data))
198  {
199  }
200 
201  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
202  : data(init.begin(), Vc::Unaligned)
203  {
204  Vc_ASSERT(init.size() == size());
205  }
206 
207  // implicit conversion from underlying vector_type
208  template <
209  typename V,
210  typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
211  Vc_INTRINSIC SimdArray(const V &x)
212  : data(simd_cast<vector_type>(x))
213  {
214  }
215 
216  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
217  // T implicitly convertible to U
218  template <typename U, typename A,
219  typename =
220  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
221  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
222  Vc_INTRINSIC operator Vector<U, A>() const
223  {
224  return simd_cast<Vector<U, A>>(data);
225  }
226  operator fixed_size_simd<T, N> &()
227  {
228  return static_cast<fixed_size_simd<T, N> &>(*this);
229  }
230  operator const fixed_size_simd<T, N> &() const
231  {
232  return static_cast<const fixed_size_simd<T, N> &>(*this);
233  }
234 
235 #include "gatherinterface.h"
236 #include "scatterinterface.h"
237 
238  // forward all remaining ctors
239  template <typename... Args,
240  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
241  !Traits::is_gather_signature<Args...>::value &&
242  !Traits::is_initializer_list<Args...>::value>>
243  explicit Vc_INTRINSIC SimdArray(Args &&... args)
244  : data(std::forward<Args>(args)...)
245  {
246  }
247 
248  template <std::size_t Offset>
249  explicit Vc_INTRINSIC SimdArray(
250  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
251  : data(Vc::IndexesFromZero)
252  {
253  data += value_type(Offset);
254  }
255 
256  Vc_INTRINSIC void setZero() { data.setZero(); }
257  Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
258  Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
259  Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
260 
261  Vc_INTRINSIC void setQnan() { data.setQnan(); }
262  Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
263 
264  // internal: execute specified Operation
265  template <typename Op, typename... Args>
266  static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
267  {
269  Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
270  return r;
271  }
272 
273  template <typename Op, typename... Args>
274  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
275  {
276  Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
277  }
278 
279  static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
280  {
281  return SimdArray(Vc::Zero);
282  }
283  static Vc_INTRINSIC fixed_size_simd<T, N> One()
284  {
285  return SimdArray(Vc::One);
286  }
287  static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
288  {
289  return SimdArray(Vc::IndexesFromZero);
290  }
291  static Vc_INTRINSIC fixed_size_simd<T, N> Random()
292  {
293  return fromOperation(Common::Operations::random());
294  }
295 
296  template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
297  {
298  data.load(std::forward<Args>(args)...);
299  }
300 
301  template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
302  {
303  data.store(std::forward<Args>(args)...);
304  }
305 
306  Vc_INTRINSIC mask_type operator!() const
307  {
308  return {private_init, !data};
309  }
310 
311  Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
312  {
313  return {private_init, -data};
314  }
315 
317  Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
318 
319  Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
320  {
321  return {private_init, ~data};
322  }
323 
324  template <typename U,
325  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
326  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
327  {
328  return {private_init, data << x};
329  }
330  template <typename U,
331  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
332  Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
333  {
334  data <<= x;
335  return *this;
336  }
337  template <typename U,
338  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
339  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
340  {
341  return {private_init, data >> x};
342  }
343  template <typename U,
344  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
345  Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
346  {
347  data >>= x;
348  return *this;
349  }
350 
351 #define Vc_BINARY_OPERATOR_(op) \
352  Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
353  { \
354  data op## = rhs.data; \
355  return *this; \
356  }
357  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
358  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
359  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
360 #undef Vc_BINARY_OPERATOR_
361 
363  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
364  {
365  return {private_init, isnegative(data)};
366  }
367 
368 private:
369  friend reference;
370  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
371  {
372  return o.data[i];
373  }
374  template <typename U>
375  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
376  noexcept(std::declval<value_type &>() = v))
377  {
378  o.data[i] = v;
379  }
380 
381 public:
388  Vc_INTRINSIC reference operator[](size_t i) noexcept
389  {
390  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
391  return {*this, int(i)};
392  }
393  Vc_INTRINSIC value_type operator[](size_t i) const noexcept
394  {
395  return get(*this, int(i));
396  }
397 
398  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
399  {
400  return {*this, k};
401  }
402 
403  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
404  {
405  data.assign(v.data, internal_data(k));
406  }
407 
408  // reductions ////////////////////////////////////////////////////////
409 #define Vc_REDUCTION_FUNCTION_(name_) \
410  Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
411  Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
412  { \
413  return data.name_(internal_data(mask)); \
414  } \
415  Vc_NOTHING_EXPECTING_SEMICOLON
416  Vc_REDUCTION_FUNCTION_(min);
417  Vc_REDUCTION_FUNCTION_(max);
418  Vc_REDUCTION_FUNCTION_(product);
419  Vc_REDUCTION_FUNCTION_(sum);
420 #undef Vc_REDUCTION_FUNCTION_
421  Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
422  {
423  return {private_init, data.partialSum()};
424  }
425 
426  template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f) const
427  {
428  return {private_init, data.apply(std::forward<F>(f))};
429  }
430  template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
431  {
432  return {private_init, data.apply(std::forward<F>(f), k)};
433  }
434 
435  Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount) const
436  {
437  return {private_init, data.shifted(amount)};
438  }
439 
440  template <std::size_t NN>
441  Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
442  const
443  {
444  return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
445  }
446 
447  Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
448  {
449  return {private_init, data.rotated(amount)};
450  }
451 
453  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
454  {
455  return {private_init, exponent(data)};
456  }
457 
458  Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x) const
459  {
460  return {private_init, data.interleaveLow(x.data)};
461  }
462  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x) const
463  {
464  return {private_init, data.interleaveHigh(x.data)};
465  }
466 
467  Vc_INTRINSIC fixed_size_simd<T, N> reversed() const
468  {
469  return {private_init, data.reversed()};
470  }
471 
472  Vc_INTRINSIC fixed_size_simd<T, N> sorted() const
473  {
474  return {private_init, data.sorted()};
475  }
476 
477  template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
478  {
479  return {private_init, VectorType::generate(gen)};
480  }
481 
482  Vc_DEPRECATED("use copysign(x, y) instead")
483  Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
484  {
485  return {private_init, Vc::copysign(data, x.data)};
486  }
487 
488  friend VectorType &internal_data<>(SimdArray &x);
489  friend const VectorType &internal_data<>(const SimdArray &x);
490 
492  Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
493 
494  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
495 
496 private:
497  // The alignas attribute attached to the class declaration above is ignored by ICC
498  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
499  // all compilers.
500  alignas(static_cast<std::size_t>(
501  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
502  VectorType_::size()>::value)) storage_type data;
503 };
504 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
505 template <typename T, std::size_t N, typename VectorType>
507 template <typename T, std::size_t N, typename VectorType>
508 #ifndef Vc_MSVC
509 Vc_INTRINSIC
510 #endif
511 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
512 {
513  return x.data;
514 }
515 template <typename T, std::size_t N, typename VectorType>
516 #ifndef Vc_MSVC
517 Vc_INTRINSIC
518 #endif
519 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
520 {
521  return x.data;
522 }
523 
524 // unpackIfSegment {{{2
525 template <typename T> T unpackIfSegment(T &&x) { return std::forward<T>(x); }
526 template <typename T, size_t Pieces, size_t Index>
527 auto unpackIfSegment(Common::Segment<T, Pieces, Index> &&x) -> decltype(x.asSimdArray())
528 {
529  return x.asSimdArray();
530 }
531 
532 // gatherImplementation {{{2
533 template <typename T, std::size_t N, typename VectorType>
534 template <typename MT, typename IT>
536  const IT &indexes)
537 {
538  data.gather(mem, unpackIfSegment(indexes));
539 }
540 template <typename T, std::size_t N, typename VectorType>
541 template <typename MT, typename IT>
543  const IT &indexes,
544  MaskArgument mask)
545 {
546  data.gather(mem, unpackIfSegment(indexes), mask);
547 }
548 
549 // scatterImplementation {{{2
550 template <typename T, std::size_t N, typename VectorType>
551 template <typename MT, typename IT>
553  IT &&indexes) const
554 {
555  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)));
556 }
557 template <typename T, std::size_t N, typename VectorType>
558 template <typename MT, typename IT>
560  IT &&indexes,
561  MaskArgument mask) const
562 {
563  data.scatter(mem, unpackIfSegment(std::forward<IT>(indexes)), mask);
564 }
565 
566 // generic SimdArray {{{1
599 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
600 {
601  static_assert(std::is_same<T, double>::value ||
602  std::is_same<T, float>::value ||
603  std::is_same<T, int32_t>::value ||
604  std::is_same<T, uint32_t>::value ||
605  std::is_same<T, int16_t>::value ||
606  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
607  static_assert(
608  std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
609  V::size() == Wt,
610  "ERROR: leave the third and fourth template parameters with their defaults. They "
611  "are implementation details.");
612  static_assert(
613  // either the EntryType and VectorEntryType of the main V are equal
614  std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
615  // or N is a multiple of V::size()
616  (N % V::size() == 0),
617  "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
618  "MIC::(u)short_v::size(), i.e. k * 16.");
619 
620  using my_traits = SimdArrayTraits<T, N>;
621  static constexpr std::size_t N0 = my_traits::N0;
622  static constexpr std::size_t N1 = my_traits::N1;
623  using Split = Common::Split<N0>;
624  template <typename U, std::size_t K> using CArray = U[K];
625 
626 public:
627  static constexpr bool is_atomic = false;
628  using storage_type0 = typename my_traits::storage_type0;
629  using storage_type1 = typename my_traits::storage_type1;
630  static_assert(storage_type0::size() == N0, "");
631 
635  using vector_type = V;
636  using vectorentry_type = typename storage_type0::vectorentry_type;
637  typedef vectorentry_type alias_type Vc_MAY_ALIAS;
638 
640  using value_type = T;
641 
644 
647 
658  static constexpr std::size_t size() { return N; }
659 
661  using Mask = mask_type;
663  using MaskType = Mask;
664  using MaskArgument = const MaskType &;
665  using VectorEntryType = vectorentry_type;
670  using AsArg = const SimdArray &;
671 
672  using reference = Detail::ElementReference<SimdArray>;
673 
675  static constexpr std::size_t MemoryAlignment =
679 
682 
684  static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
685  {
686  return SimdArray(Vc::Zero);
687  }
688 
690  static Vc_INTRINSIC fixed_size_simd<T, N> One()
691  {
692  return SimdArray(Vc::One);
693  }
694 
697  {
698  return SimdArray(Vc::IndexesFromZero);
699  }
700 
702  static Vc_INTRINSIC fixed_size_simd<T, N> Random()
703  {
704  return fromOperation(Common::Operations::random());
705  }
706 
708  template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen) // {{{2
709  {
710  auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
711  // an initializer list is well-defined
712  // (front to back), but GCC 4.8 doesn't
713  // implement this correctly. Therefore
714  // we enforce correct order.
715  return {std::move(tmp),
716  storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
717  }
719 
722 
724  SimdArray() = default;
726 
729 
731  Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
732  template <
733  typename U,
734  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
735  SimdArray(U a)
736  : SimdArray(static_cast<value_type>(a))
737  {
738  }
740 
741  // default copy ctor/operator
742  SimdArray(const SimdArray &) = default;
743  SimdArray(SimdArray &&) = default;
744  SimdArray &operator=(const SimdArray &) = default;
745 
746  // load ctor
747  template <typename U, typename Flags = DefaultLoadTag,
748  typename = enable_if<std::is_arithmetic<U>::value &&
749  Traits::is_load_store_flag<Flags>::value>>
750  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
751  : data0(mem, f), data1(mem + storage_type0::size(), f)
752  {
753  }
754 
755 // MSVC does overload resolution differently and takes the const U *mem overload (I hope)
756 #ifndef Vc_MSVC
757 
763  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
764  typename = enable_if<std::is_arithmetic<U>::value &&
765  Traits::is_load_store_flag<Flags>::value>>
766  explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
767  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
768  {
769  }
773  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
774  typename = enable_if<std::is_arithmetic<U>::value &&
775  Traits::is_load_store_flag<Flags>::value>>
776  explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags())
777  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
778  {
779  }
780 #endif
781 
782  // initializer list
783  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
784  : data0(init.begin(), Vc::Unaligned)
785  , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
786  {
787  Vc_ASSERT(init.size() == size());
788  }
789 
790 #include "gatherinterface.h"
791 #include "scatterinterface.h"
792 
793  // forward all remaining ctors
794  template <typename... Args,
795  typename = enable_if<!Traits::is_cast_arguments<Args...>::value &&
796  !Traits::is_initializer_list<Args...>::value &&
797  !Traits::is_gather_signature<Args...>::value &&
798  !Traits::is_load_arguments<Args...>::value>>
799  explicit Vc_INTRINSIC SimdArray(Args &&... args)
800  : data0(Split::lo(args)...) // no forward here - it could move and thus
801  // break the next line
802  , data1(Split::hi(std::forward<Args>(args))...)
803  {
804  }
805 
806  // explicit casts
807  template <class W, class = enable_if<
810  !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
812  Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
813  {
814  }
815 
816  // implicit casts
817  template <class W, class = enable_if<
819  Traits::simd_vector_size<W>::value == N &&
820  std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
821  class = W>
822  Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
823  {
824  }
825 
826  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
827  // T implicitly convertible to U
828  template <typename U, typename A,
829  typename =
830  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
831  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
832  operator Vector<U, A>() const
833  {
834  auto r = simd_cast<Vector<U, A>>(data0, data1);
835  return r;
836  }
837  Vc_INTRINSIC operator fixed_size_simd<T, N> &()
838  {
839  return static_cast<fixed_size_simd<T, N> &>(*this);
840  }
841  Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
842  {
843  return static_cast<const fixed_size_simd<T, N> &>(*this);
844  }
845 
847 
848  Vc_INTRINSIC void setZero()
849  {
850  data0.setZero();
851  data1.setZero();
852  }
853  Vc_INTRINSIC void setZero(const mask_type &k)
854  {
855  data0.setZero(Split::lo(k));
856  data1.setZero(Split::hi(k));
857  }
858  Vc_INTRINSIC void setZeroInverted()
859  {
860  data0.setZeroInverted();
861  data1.setZeroInverted();
862  }
863  Vc_INTRINSIC void setZeroInverted(const mask_type &k)
864  {
865  data0.setZeroInverted(Split::lo(k));
866  data1.setZeroInverted(Split::hi(k));
867  }
868 
869 
870  Vc_INTRINSIC void setQnan() {
871  data0.setQnan();
872  data1.setQnan();
873  }
874  Vc_INTRINSIC void setQnan(const mask_type &m) {
875  data0.setQnan(Split::lo(m));
876  data1.setQnan(Split::hi(m));
877  }
878 
880  template <typename Op, typename... Args>
881  static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
882  {
884  storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
885  // could move and thus
886  // break the next line
887  storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
888  return r;
889  }
890 
892  template <typename Op, typename... Args>
893  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
894  {
895  storage_type0::callOperation(op, Split::lo(args)...);
896  storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
897  }
898 
899 
900  template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
901  {
902  data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
903  // break the next line
904  data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
905  }
906 
907  template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
908  {
909  data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
910  // break the next line
911  data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
912  }
913 
914  Vc_INTRINSIC mask_type operator!() const
915  {
916  return {!data0, !data1};
917  }
918 
919  Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
920  {
921  return {-data0, -data1};
922  }
923 
925  Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
926 
927  Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
928  {
929  return {~data0, ~data1};
930  }
931 
932  // left/right shift operators {{{2
933  template <typename U,
934  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
935  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
936  {
937  return {data0 << x, data1 << x};
938  }
939  template <typename U,
940  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
941  Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
942  {
943  data0 <<= x;
944  data1 <<= x;
945  return *this;
946  }
947  template <typename U,
948  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
949  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
950  {
951  return {data0 >> x, data1 >> x};
952  }
953  template <typename U,
954  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
955  Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
956  {
957  data0 >>= x;
958  data1 >>= x;
959  return *this;
960  }
961 
962  // binary operators {{{2
963 #define Vc_BINARY_OPERATOR_(op) \
964  Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
965  { \
966  data0 op## = rhs.data0; \
967  data1 op## = rhs.data1; \
968  return *this; \
969  }
970  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
971  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
972  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
973 #undef Vc_BINARY_OPERATOR_
974 
975  // operator[] {{{2
978 
979 private:
980  friend reference;
981  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
982  {
983  return reinterpret_cast<const alias_type *>(&o)[i];
984  }
985  template <typename U>
986  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
987  noexcept(std::declval<value_type &>() = v))
988  {
989  reinterpret_cast<alias_type *>(&o)[i] = v;
990  }
991 
992 public:
994 
1000  Vc_INTRINSIC reference operator[](size_t i) noexcept
1001  {
1002  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1003  return {*this, int(i)};
1004  }
1005 
1007  Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1008  {
1009  return get(*this, int(index));
1010  }
1012 
1013  // operator(){{{2
1015  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1016  const mask_type &mask)
1017  {
1018  return {*this, mask};
1019  }
1020 
1022  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
1023  {
1024  data0.assign(v.data0, internal_data0(k));
1025  data1.assign(v.data1, internal_data1(k));
1026  }
1027 
1028  // reductions {{{2
1029 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1030 private: \
1031  template <typename ForSfinae = void> \
1032  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1033  storage_type0::Size == storage_type1::Size, \
1034  value_type> name_##_impl() const \
1035  { \
1036  return binary_fun_(data0, data1).name_(); \
1037  } \
1038  \
1039  template <typename ForSfinae = void> \
1040  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1041  storage_type0::Size != storage_type1::Size, \
1042  value_type> name_##_impl() const \
1043  { \
1044  return scalar_fun_(data0.name_(), data1.name_()); \
1045  } \
1046  \
1047 public: \
1048  \
1049  Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1050  \
1051  Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1052  { \
1053  if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1054  return data1.name_(Split::hi(mask)); \
1055  } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1056  return data0.name_(Split::lo(mask)); \
1057  } else { \
1058  return scalar_fun_(data0.name_(Split::lo(mask)), \
1059  data1.name_(Split::hi(mask))); \
1060  } \
1061  } \
1062  Vc_NOTHING_EXPECTING_SEMICOLON
1063  Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1064  Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1065  Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1066  Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1067 #undef Vc_REDUCTION_FUNCTION_
1068  Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const //{{{2
1070  {
1071  auto ps0 = data0.partialSum();
1072  auto tmp = data1;
1073  tmp[0] += ps0[data0.size() - 1];
1074  return {std::move(ps0), tmp.partialSum()};
1075  }
1076 
1077  // apply {{{2
1079  template <typename F> inline fixed_size_simd<T, N> apply(F &&f) const
1080  {
1081  return {data0.apply(f), data1.apply(f)};
1082  }
1084  template <typename F>
1085  inline fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
1086  {
1087  return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1088  }
1089 
1090  // shifted {{{2
1092  inline fixed_size_simd<T, N> shifted(int amount) const
1093  {
1094  constexpr int SSize = Size;
1095  constexpr int SSize0 = storage_type0::Size;
1096  constexpr int SSize1 = storage_type1::Size;
1097  if (amount == 0) {
1098  return *this;
1099  }
1100  if (amount < 0) {
1101  if (amount > -SSize0) {
1102  return {data0.shifted(amount), data1.shifted(amount, data0)};
1103  }
1104  if (amount == -SSize0) {
1105  return {storage_type0(0), simd_cast<storage_type1>(data0)};
1106  }
1107  if (amount < -SSize0) {
1108  return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1109  amount + SSize0))};
1110  }
1111  return Zero();
1112  } else {
1113  if (amount >= SSize) {
1114  return Zero();
1115  } else if (amount >= SSize0) {
1116  return {
1117  simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1118  storage_type1(0)};
1119  } else if (amount >= SSize1) {
1120  return {data0.shifted(amount, data1), storage_type1(0)};
1121  } else {
1122  return {data0.shifted(amount, data1), data1.shifted(amount)};
1123  }
1124  }
1125  }
1126 
1127  template <std::size_t NN>
1128  inline enable_if<
1129  !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1130  N == NN),
1132  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1133  {
1134  constexpr int SSize = Size;
1135  if (amount < 0) {
1136  return fixed_size_simd<T, N>([&](int i) -> value_type {
1137  i += amount;
1138  if (i >= 0) {
1139  return operator[](i);
1140  } else if (i >= -SSize) {
1141  return shiftIn[i + SSize];
1142  }
1143  return 0;
1144  });
1145  }
1146  return fixed_size_simd<T, N>([&](int i) -> value_type {
1147  i += amount;
1148  if (i < SSize) {
1149  return operator[](i);
1150  } else if (i < 2 * SSize) {
1151  return shiftIn[i - SSize];
1152  }
1153  return 0;
1154  });
1155  }
1156 
1157 private:
1158  // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1159  // expression directly in the enable_if below
1160  template <std::size_t NN> struct bisectable_shift
1161  : public std::integral_constant<bool,
1162  std::is_same<storage_type0, storage_type1>::value && // bisectable
1163  N == NN>
1164  {
1165  };
1166 
1167 public:
1168  template <std::size_t NN>
1170  enable_if<bisectable_shift<NN>::value, int> amount,
1171  const SimdArray<value_type, NN> &shiftIn) const
1172  {
1173  constexpr int SSize = Size;
1174  if (amount < 0) {
1175  if (amount > -static_cast<int>(storage_type0::Size)) {
1176  return {data0.shifted(amount, internal_data1(shiftIn)),
1177  data1.shifted(amount, data0)};
1178  }
1179  if (amount == -static_cast<int>(storage_type0::Size)) {
1180  return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1181  }
1182  if (amount > -SSize) {
1183  return {
1184  internal_data1(shiftIn)
1185  .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1186  data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1187  }
1188  if (amount == -SSize) {
1189  return shiftIn;
1190  }
1191  if (amount > -2 * SSize) {
1192  return shiftIn.shifted(amount + SSize);
1193  }
1194  }
1195  if (amount == 0) {
1196  return *this;
1197  }
1198  if (amount < static_cast<int>(storage_type0::Size)) {
1199  return {data0.shifted(amount, data1),
1200  data1.shifted(amount, internal_data0(shiftIn))};
1201  }
1202  if (amount == static_cast<int>(storage_type0::Size)) {
1203  return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1204  }
1205  if (amount < SSize) {
1206  return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1207  internal_data0(shiftIn)
1208  .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1209  }
1210  if (amount == SSize) {
1211  return shiftIn;
1212  }
1213  if (amount < 2 * SSize) {
1214  return shiftIn.shifted(amount - SSize);
1215  }
1216  return Zero();
1217  }
1218 
1219  // rotated {{{2
1221  Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
1222  {
1223  amount %= int(size());
1224  if (amount == 0) {
1225  return *this;
1226  } else if (amount < 0) {
1227  amount += size();
1228  }
1229 
1230 #ifdef Vc_MSVC
1231  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1232  // ->
1233  // load to implement the function instead.
1234  alignas(MemoryAlignment) T tmp[N + data0.size()];
1235  data0.store(&tmp[0], Vc::Aligned);
1236  data1.store(&tmp[data0.size()], Vc::Aligned);
1237  data0.store(&tmp[N], Vc::Unaligned);
1239  r.data0.load(&tmp[amount], Vc::Unaligned);
1240  r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1241  return r;
1242 #else
1243  auto &&d0cvtd = simd_cast<storage_type1>(data0);
1244  auto &&d1cvtd = simd_cast<storage_type0>(data1);
1245  constexpr int size0 = storage_type0::size();
1246  constexpr int size1 = storage_type1::size();
1247 
1248  if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1249  return {std::move(d1cvtd), std::move(d0cvtd)};
1250  } else if (amount < size1) {
1251  return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1252  } else if (amount == size1) {
1253  return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1254  } else if (int(size()) - amount < size1) {
1255  return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1256  data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1257  } else if (int(size()) - amount == size1) {
1258  return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1259  simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1260  } else if (amount <= size0) {
1261  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1262  simd_cast<storage_type1>(data0.shifted(amount - size1))};
1263  } else {
1264  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1265  simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1266  }
1267  return *this;
1268 #endif
1269  }
1270 
1271  // interleaveLow/-High {{{2
1273  Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(const SimdArray &x) const
1274  {
1275  // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1276  return {data0.interleaveLow(x.data0),
1277  simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1278  }
1280  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(const SimdArray &x) const
1281  {
1282  return interleaveHighImpl(
1283  x,
1284  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1285  }
1286 
1287 private:
1289  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::true_type) const
1290  {
1291  return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1292  }
1294  inline fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::false_type) const
1295  {
1296  return {data0.interleaveHigh(x.data0)
1297  .shifted(storage_type1::Size,
1298  simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1299  data1.interleaveHigh(x.data1)};
1300  }
1301 
1302 public:
1304  inline fixed_size_simd<T, N> reversed() const //{{{2
1305  {
1306  if (std::is_same<storage_type0, storage_type1>::value) {
1307  return {simd_cast<storage_type0>(data1).reversed(),
1308  simd_cast<storage_type1>(data0).reversed()};
1309  } else {
1310 #ifdef Vc_MSVC
1311  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1312  // store
1313  // -> load to implement the function instead.
1314  alignas(MemoryAlignment) T tmp[N];
1315  data1.reversed().store(&tmp[0], Vc::Aligned);
1316  data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1317  return fixed_size_simd<T, N>{&tmp[0], Vc::Aligned};
1318 #else
1319  return {data0.shifted(storage_type1::Size, data1).reversed(),
1320  simd_cast<storage_type1>(data0.reversed().shifted(
1321  storage_type0::Size - storage_type1::Size))};
1322 #endif
1323  }
1324  }
1326  inline fixed_size_simd<T, N> sorted() const //{{{2
1327  {
1328  return sortedImpl(
1329  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1330  }
1331 
1333  Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::true_type) const
1334  {
1335 #ifdef Vc_DEBUG_SORTED
1336  std::cerr << "-- " << data0 << data1 << '\n';
1337 #endif
1338  const auto a = data0.sorted();
1339  const auto b = data1.sorted().reversed();
1340  const auto lo = Vc::min(a, b);
1341  const auto hi = Vc::max(a, b);
1342  return {lo.sorted(), hi.sorted()};
1343  }
1344 
1346  Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type) const
1347  {
1348  using SortableArray =
1350  auto sortable = simd_cast<SortableArray>(*this);
1351  for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1352  using limits = std::numeric_limits<value_type>;
1353  if (limits::has_infinity) {
1354  sortable[i] = limits::infinity();
1355  } else {
1356  sortable[i] = std::numeric_limits<value_type>::max();
1357  }
1358  }
1359  return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1360 
1361  /* The following implementation appears to be less efficient. But this may need further
1362  * work.
1363  const auto a = data0.sorted();
1364  const auto b = data1.sorted();
1365 #ifdef Vc_DEBUG_SORTED
1366  std::cerr << "== " << a << b << '\n';
1367 #endif
1368  auto aIt = Vc::begin(a);
1369  auto bIt = Vc::begin(b);
1370  const auto aEnd = Vc::end(a);
1371  const auto bEnd = Vc::end(b);
1372  return SimdArray::generate([&](std::size_t) {
1373  if (aIt == aEnd) {
1374  return *(bIt++);
1375  }
1376  if (bIt == bEnd) {
1377  return *(aIt++);
1378  }
1379  if (*aIt < *bIt) {
1380  return *(aIt++);
1381  } else {
1382  return *(bIt++);
1383  }
1384  });
1385  */
1386  }
1387 
1390 
1393  static constexpr std::size_t Size = size();
1394 
1396  Vc_DEPRECATED("use exponent(x) instead")
1397  Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
1398  {
1399  return {exponent(data0), exponent(data1)};
1400  }
1401 
1403  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1404  {
1405  return {isnegative(data0), isnegative(data1)};
1406  }
1407 
1409  Vc_DEPRECATED("use copysign(x, y) instead")
1410  Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
1411  {
1412  return {Vc::copysign(data0, x.data0),
1413  Vc::copysign(data1, x.data1)};
1414  }
1416 
1417  // internal_data0/1 {{{2
1418  friend storage_type0 &internal_data0<>(SimdArray &x);
1419  friend storage_type1 &internal_data1<>(SimdArray &x);
1420  friend const storage_type0 &internal_data0<>(const SimdArray &x);
1421  friend const storage_type1 &internal_data1<>(const SimdArray &x);
1422 
1424  Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1425  : data0(std::move(x)), data1(std::move(y))
1426  {
1427  }
1428 
1429  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1430 
1431 private: //{{{2
1432  // The alignas attribute attached to the class declaration above is ignored by ICC
1433  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1434  // all compilers.
1435  alignas(static_cast<std::size_t>(
1436  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1437  V::size()>::value)) storage_type0 data0;
1438  storage_type1 data1;
1439 };
1440 #undef Vc_CURRENT_CLASS_NAME
1441 template <typename T, std::size_t N, typename V, std::size_t M>
1442 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1443 template <typename T, std::size_t N, typename V, std::size_t M>
1444 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1445 
1446 // gatherImplementation {{{2
1447 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1448 template <typename MT, typename IT>
1450  const IT &indexes)
1451 {
1452  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes));
1453  data1.gather(mem, Split::hi(Common::Operations::gather(), indexes));
1454 }
1455 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1456 template <typename MT, typename IT>
1458  const IT &indexes,
1459  MaskArgument mask)
1460 {
1461  data0.gather(mem, Split::lo(Common::Operations::gather(), indexes), Split::lo(mask));
1462  data1.gather(mem, Split::hi(Common::Operations::gather(), indexes), Split::hi(mask));
1463 }
1464 
1465 // scatterImplementation {{{2
1466 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1467 template <typename MT, typename IT>
1469  IT &&indexes) const
1470 {
1471  data0.scatter(mem, Split::lo(Common::Operations::gather(),
1472  indexes)); // don't forward indexes - it could move and
1473  // thus break the next line
1474  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1475 }
1476 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1477 template <typename MT, typename IT>
1479  IT &&indexes, MaskArgument mask) const
1480 {
1481  data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1482  Split::lo(mask)); // don't forward indexes - it could move and
1483  // thus break the next line
1484  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1485  Split::hi(mask));
1486 }
1487 
1488 // internal_data0/1 (SimdArray) {{{1
1490 template <typename T, std::size_t N, typename V, std::size_t M>
1491 #ifndef Vc_MSVC
1492 Vc_INTRINSIC
1493 #endif
1494 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1496 {
1497  return x.data0;
1498 }
1500 template <typename T, std::size_t N, typename V, std::size_t M>
1501 #ifndef Vc_MSVC
1502 Vc_INTRINSIC
1503 #endif
1504 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1506 {
1507  return x.data1;
1508 }
1510 template <typename T, std::size_t N, typename V, std::size_t M>
1511 #ifndef Vc_MSVC
1512 Vc_INTRINSIC
1513 #endif
1514 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1515  const SimdArray<T, N, V, M> &x)
1516 {
1517  return x.data0;
1518 }
1520 template <typename T, std::size_t N, typename V, std::size_t M>
1521 #ifndef Vc_MSVC
1522 Vc_INTRINSIC
1523 #endif
1524 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1525  const SimdArray<T, N, V, M> &x)
1526 {
1527  return x.data1;
1528 }
1529 
1530 // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1531 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1532 // in the body the bug is supressed.
1533 #if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1534 template <>
1537  : data0(x), data1(0)
1538 {
1539  data1 = y;
1540 }
1541 #endif
1542 
1543 // binary operators {{{
1544 namespace Detail
1545 {
1546 #define Vc_FIXED_OP(op) \
1547  template <class T, int N, \
1548  class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1549  fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1550  const fixed_size_simd<T, N> &b) \
1551  { \
1552  return {private_init, internal_data(a) op internal_data(b)}; \
1553  } \
1554  template <class T, int N, \
1555  class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1556  class = T> \
1557  fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1558  const fixed_size_simd<T, N> &b) \
1559  { \
1560  return {internal_data0(a) op internal_data0(b), \
1561  internal_data1(a) op internal_data1(b)}; \
1562  }
1563 Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1564 Vc_ALL_BINARY(Vc_FIXED_OP);
1565 Vc_ALL_SHIFTS(Vc_FIXED_OP);
1566 #undef Vc_FIXED_OP
1567 #define Vc_FIXED_OP(op) \
1568  template <class T, int N, \
1569  class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1570  fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1571  const fixed_size_simd<T, N> &b) \
1572  { \
1573  return {private_init, internal_data(a) op internal_data(b)}; \
1574  } \
1575  template <class T, int N, \
1576  class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1577  class = T> \
1578  fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1579  const fixed_size_simd<T, N> &b) \
1580  { \
1581  return {internal_data0(a) op internal_data0(b), \
1582  internal_data1(a) op internal_data1(b)}; \
1583  }
1584 Vc_ALL_COMPARES(Vc_FIXED_OP);
1585 #undef Vc_FIXED_OP
1586 } // namespace Detail
1587 
1588 // }}}
1589 // binary operators {{{1
1590 namespace result_vector_type_internal
1591 {
1592 template <typename T>
1593 using remove_cvref = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1594 
1595 template <typename T>
1596 using is_integer_larger_than_int = std::integral_constant<
1597  bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1598  std::is_same<T, long>::value ||
1599  std::is_same<T, unsigned long>::value)>;
1600 
1601 template <
1602  typename L, typename R,
1606  Traits::isSimdArray<R>::value) && // one of the operands must be a SimdArray
1607  !(Traits::is_fixed_size_simd<L>::value && // if both are fixed_size, use
1608  Traits::is_fixed_size_simd<R>::value) && // common/operators.h
1609  ((std::is_arithmetic<remove_cvref<L>>::value && // one of the operands is a
1610  !is_integer_larger_than_int<remove_cvref<L>>::value) || // scalar type
1611  (std::is_arithmetic<remove_cvref<R>>::value &&
1612  !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1613  // or one of the operands is Vector<T> with Vector<T>::size() ==
1614  // SimdArray::size()
1616 struct evaluate;
1617 
1618 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1619 {
1620 private:
1621  using LScalar = Traits::entry_type_of<L>;
1622  using RScalar = Traits::entry_type_of<R>;
1623 
1624  template <bool B, typename T, typename F>
1625  using conditional = typename std::conditional<B, T, F>::type;
1626 
1627 public:
1628  // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1629  // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1630  // int are promoted to int before any operation). This would imply that SIMD types with integral
1631  // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1632  // start. Therefore we special-case those operations where the scalar type of both operands is
1633  // integral and smaller than int.
1634  // In addition, there is no generic support for 64-bit int SIMD types. Therefore
1635  // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1636  // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1637  // then the operator is disabled altogether. We do not want an implicit demotion.
1638  using type = fixed_size_simd<
1639  conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1640  sizeof(LScalar) < sizeof(int) &&
1641  sizeof(RScalar) < sizeof(int)),
1642  conditional<(sizeof(LScalar) == sizeof(RScalar)),
1643  conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1644  conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1645  decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1646  N>;
1647 };
1648 
1649 } // namespace result_vector_type_internal
1650 
1651 template <typename L, typename R>
1652 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1653 
1654 #define Vc_BINARY_OPERATORS_(op_) \
1655  \
1656  template <typename L, typename R> \
1657  Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1658  { \
1659  using Return = result_vector_type<L, R>; \
1660  return Vc::Detail::operator op_( \
1661  static_cast<const Return &>(std::forward<L>(lhs)), \
1662  static_cast<const Return &>(std::forward<R>(rhs))); \
1663  }
1664 
1681 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1683 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1685 #undef Vc_BINARY_OPERATORS_
1686 #define Vc_BINARY_OPERATORS_(op_) \
1687  \
1688  template <typename L, typename R> \
1689  Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1690  R &&rhs) \
1691  { \
1692  using Promote = result_vector_type<L, R>; \
1693  return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1694  }
1695 
1712 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1715 #undef Vc_BINARY_OPERATORS_
1716 
1717 // math functions {{{1
1718 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1719  \
1720  template <typename T, std::size_t N, typename V, std::size_t M> \
1721  inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \
1722  { \
1723  return fixed_size_simd<T, N>::fromOperation( \
1724  Common::Operations::Forward_##name_(), x); \
1725  } \
1726  template <class T, int N> \
1727  fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \
1728  { \
1729  return fixed_size_simd<T, N>::fromOperation( \
1730  Common::Operations::Forward_##name_(), x); \
1731  } \
1732  Vc_NOTHING_EXPECTING_SEMICOLON
1733 
1734 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1735  \
1736  template <typename T, std::size_t N, typename V, std::size_t M> \
1737  inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \
1738  { \
1739  return fixed_size_simd_mask<T, N>::fromOperation( \
1740  Common::Operations::Forward_##name_(), x); \
1741  } \
1742  template <class T, int N> \
1743  fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \
1744  { \
1745  return fixed_size_simd_mask<T, N>::fromOperation( \
1746  Common::Operations::Forward_##name_(), x); \
1747  } \
1748  Vc_NOTHING_EXPECTING_SEMICOLON
1749 
1750 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1751  \
1752  template <typename T, std::size_t N, typename V, std::size_t M> \
1753  inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \
1754  const SimdArray<T, N, V, M> &y) \
1755  { \
1756  return fixed_size_simd<T, N>::fromOperation( \
1757  Common::Operations::Forward_##name_(), x, y); \
1758  } \
1759  Vc_NOTHING_EXPECTING_SEMICOLON
1760 
1765 Vc_FORWARD_UNARY_OPERATOR(abs);
1767 Vc_FORWARD_UNARY_OPERATOR(asin);
1768 Vc_FORWARD_UNARY_OPERATOR(atan);
1769 Vc_FORWARD_BINARY_OPERATOR(atan2);
1770 Vc_FORWARD_UNARY_OPERATOR(ceil);
1771 Vc_FORWARD_BINARY_OPERATOR(copysign);
1772 Vc_FORWARD_UNARY_OPERATOR(cos);
1773 Vc_FORWARD_UNARY_OPERATOR(exp);
1774 Vc_FORWARD_UNARY_OPERATOR(exponent);
1775 Vc_FORWARD_UNARY_OPERATOR(floor);
1777 template <typename T, std::size_t N>
1779  const SimdArray<T, N> &c)
1780 {
1781  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1782 }
1783 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1784 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1785 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1786 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1788 template <typename T, std::size_t N>
1790 {
1791  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1792 }
1794 template <typename T, std::size_t N>
1796 {
1797  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1798 }
1799 Vc_FORWARD_UNARY_OPERATOR(log);
1800 Vc_FORWARD_UNARY_OPERATOR(log10);
1801 Vc_FORWARD_UNARY_OPERATOR(log2);
1802 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1803 Vc_FORWARD_UNARY_OPERATOR(round);
1804 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1805 Vc_FORWARD_UNARY_OPERATOR(sin);
1807 template <typename T, std::size_t N>
1809 {
1810  SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1811 }
1812 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1813 Vc_FORWARD_UNARY_OPERATOR(trunc);
1814 Vc_FORWARD_BINARY_OPERATOR(min);
1815 Vc_FORWARD_BINARY_OPERATOR(max);
1817 #undef Vc_FORWARD_UNARY_OPERATOR
1818 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1819 #undef Vc_FORWARD_BINARY_OPERATOR
1820 
1821 // simd_cast {{{1
1822 #ifdef Vc_MSVC
1823 #define Vc_DUMMY_ARG0 , int = 0
1824 #define Vc_DUMMY_ARG1 , long = 0
1825 #define Vc_DUMMY_ARG2 , short = 0
1826 #define Vc_DUMMY_ARG3 , char = '0'
1827 #define Vc_DUMMY_ARG4 , unsigned = 0u
1828 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1829 #else
1830 #define Vc_DUMMY_ARG0
1831 #define Vc_DUMMY_ARG1
1832 #define Vc_DUMMY_ARG2
1833 #define Vc_DUMMY_ARG3
1834 #define Vc_DUMMY_ARG4
1835 #define Vc_DUMMY_ARG5
1836 #endif // Vc_MSVC
1837 
1838 // simd_cast_impl_smaller_input {{{2
1839 // The following function can be implemented without the sizeof...(From) overload.
1840 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1841 // function in two works around the issue.
1842 template <typename Return, std::size_t N, typename T, typename... From>
1843 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1844 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1845 {
1846  Return r = simd_cast<Return>(xs...);
1847  for (size_t i = 0; i < N; ++i) {
1848  r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1849  }
1850  return r;
1851 }
1852 template <typename Return, std::size_t N, typename T>
1853 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1854 {
1855  Return r = Return();
1856  for (size_t i = 0; i < N; ++i) {
1857  r[i] = static_cast<typename Return::EntryType>(last[i]);
1858  }
1859  return r;
1860 }
1861 template <typename Return, std::size_t N, typename T, typename... From>
1862 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1863  const From &... xs, const T &last)
1864 {
1865  Return r = simd_cast<Return>(xs...);
1866  for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1867  r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1868  }
1869  return r;
1870 }
1871 template <typename Return, std::size_t N, typename T>
1872 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1873 {
1874  Return r = Return();
1875  for (size_t i = 0; i < Return::size(); ++i) {
1876  r[i] = static_cast<typename Return::EntryType>(last[i]);
1877  }
1878  return r;
1879 }
1880 
1881 // simd_cast_without_last (declaration) {{{2
1882 template <typename Return, typename T, typename... From>
1883 Vc_INTRINSIC_L Vc_CONST_L Return
1884  simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1885 
1886 // are_all_types_equal {{{2
1887 template <typename... Ts> struct are_all_types_equal;
1888 template <typename T>
1889 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1890 {
1891 };
1892 template <typename T0, typename T1, typename... Ts>
1893 struct are_all_types_equal<T0, T1, Ts...>
1894  : public std::integral_constant<
1895  bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1896 {
1897 };
1898 
1899 // simd_cast_interleaved_argument_order (declarations) {{{2
1919 template <typename Return, typename... Ts>
1920 Vc_INTRINSIC Vc_CONST Return
1921  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1922 
1923 // simd_cast_with_offset (declarations and one impl) {{{2
1924 // offset == 0 {{{3
1925 template <typename Return, std::size_t offset, typename From, typename... Froms>
1926 Vc_INTRINSIC Vc_CONST
1927  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1928  simd_cast_with_offset(const From &x, const Froms &... xs);
1929 // offset > 0 && offset divisible by Return::Size {{{3
1930 template <typename Return, std::size_t offset, typename From>
1931 Vc_INTRINSIC Vc_CONST
1932  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1933  simd_cast_with_offset(const From &x);
1934 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1935 template <typename Return, std::size_t offset, typename From>
1936 Vc_INTRINSIC Vc_CONST
1937  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1939  !Traits::isAtomicSimdArray<Return>::value) ||
1941  !Traits::isAtomicSimdMaskArray<Return>::value))),
1942  Return>
1943  simd_cast_with_offset(const From &x);
1944 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1945 template <typename Return, std::size_t offset, typename From>
1946 Vc_INTRINSIC Vc_CONST
1947  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1949  Traits::isAtomicSimdArray<Return>::value) ||
1951  Traits::isAtomicSimdMaskArray<Return>::value))),
1952  Return>
1953  simd_cast_with_offset(const From &x);
1954 // offset > first argument (drops first arg) {{{3
1955 template <typename Return, std::size_t offset, typename From, typename... Froms>
1956 Vc_INTRINSIC Vc_CONST enable_if<
1957  (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1958  simd_cast_with_offset(const From &, const Froms &... xs)
1959 {
1960  return simd_cast_with_offset<Return, offset - From::Size>(xs...);
1961 }
1962 
1963 // offset > first and only argument (returns Zero) {{{3
1964 template <typename Return, std::size_t offset, typename From>
1965 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
1966  const From &)
1967 {
1968  return Return(0);
1969 }
1970 
1971 // first_type_of {{{2
1972 template <typename T, typename... Ts> struct first_type_of_impl
1973 {
1974  using type = T;
1975 };
1976 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
1977 
1978 // simd_cast_drop_arguments (declarations) {{{2
1979 template <typename Return, typename From>
1980 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
1981 template <typename Return, typename... Froms>
1982 Vc_INTRINSIC Vc_CONST
1983  enable_if<(are_all_types_equal<Froms...>::value &&
1984  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
1985  Return>
1986  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
1987 // The following function can be implemented without the sizeof...(From) overload.
1988 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1989 // function in two works around the issue.
1990 template <typename Return, typename From, typename... Froms>
1991 Vc_INTRINSIC Vc_CONST enable_if<
1992  (are_all_types_equal<From, Froms...>::value &&
1993  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
1994  Return>
1995 simd_cast_drop_arguments(Froms... xs, From x, From);
1996 template <typename Return, typename From>
1997 Vc_INTRINSIC Vc_CONST
1998  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
1999  simd_cast_drop_arguments(From x, From);
2000 
2001 namespace
2002 {
2003 #ifdef Vc_DEBUG_SIMD_CAST
2004 void debugDoNothing(const std::initializer_list<void *> &) {}
2005 template <typename T0, typename... Ts>
2006 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
2007  const Ts &... args)
2008 {
2009  std::cerr << prefix << arg0;
2010  debugDoNothing({&(std::cerr << ", " << args)...});
2011  std::cerr << suffix;
2012 }
2013 #else
2014 template <typename T0, typename... Ts>
2015 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2016 {
2017 }
2018 #endif
2019 } // unnamed namespace
2020 
2021 // is_less trait{{{2
2022 template <size_t A, size_t B>
2023 struct is_less : public std::integral_constant<bool, (A < B)> {
2024 };
2025 
2026 // is_power_of_2 trait{{{2
2027 template <size_t N>
2028 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2029 };
2030 
2031 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
2032 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2033  template <typename Return, typename T, typename A, typename... Froms> \
2034  Vc_INTRINSIC Vc_CONST enable_if< \
2035  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2036  is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2037  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2038  !detail::is_fixed_size_abi<A>::value), \
2039  Return> \
2040  simd_cast(NativeType_<T, A> x, Froms... xs) \
2041  { \
2042  vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2043  return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2044  } \
2045  template <typename Return, typename T, typename A, typename... Froms> \
2046  Vc_INTRINSIC Vc_CONST enable_if< \
2047  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2048  !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2049  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2050  !detail::is_fixed_size_abi<A>::value), \
2051  Return> \
2052  simd_cast(NativeType_<T, A> x, Froms... xs) \
2053  { \
2054  vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2055  return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2056  } \
2057  template <typename Return, typename T, typename A, typename... Froms> \
2058  Vc_INTRINSIC Vc_CONST \
2059  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2060  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2061  is_less<Common::left_size<Return::Size>(), \
2062  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2063  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2064  !detail::is_fixed_size_abi<A>::value), \
2065  Return> \
2066  simd_cast(NativeType_<T, A> x, Froms... xs) \
2067  { \
2068  vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2069  using R0 = typename Return::storage_type0; \
2070  using R1 = typename Return::storage_type1; \
2071  return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2072  simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2073  } \
2074  template <typename Return, typename T, typename A, typename... Froms> \
2075  Vc_INTRINSIC Vc_CONST \
2076  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2077  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2078  !is_less<Common::left_size<Return::Size>(), \
2079  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2080  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2081  !detail::is_fixed_size_abi<A>::value), \
2082  Return> \
2083  simd_cast(NativeType_<T, A> x, Froms... xs) \
2084  { \
2085  vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2086  using R0 = typename Return::storage_type0; \
2087  using R1 = typename Return::storage_type1; \
2088  return {simd_cast<R0>(x, xs...), R1(0)}; \
2089  } \
2090  Vc_NOTHING_EXPECTING_SEMICOLON
2091 
2092 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2093 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2094 #undef Vc_SIMDARRAY_CASTS
2095 
2096 // simd_cast<SimdArray/-mask, offset>(V) {{{2
2097 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2098  /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2099  template <typename Return, int offset, typename T, typename A> \
2100  Vc_INTRINSIC Vc_CONST \
2101  enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2102  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2103  { \
2104  vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2105  return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2106  } \
2107  /* both halves of Return array are extracted from argument */ \
2108  template <typename Return, int offset, typename T, typename A> \
2109  Vc_INTRINSIC Vc_CONST \
2110  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2111  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2112  Return::Size * offset + Common::left_size<Return::Size>() < \
2113  NativeType_<T, A>::Size), \
2114  Return> \
2115  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2116  { \
2117  vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2118  using R0 = typename Return::storage_type0; \
2119  constexpr int entries_offset = offset * Return::Size; \
2120  constexpr int entries_offset_right = entries_offset + R0::Size; \
2121  return { \
2122  simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2123  simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2124  x)}; \
2125  } \
2126  /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2127  /* right half of Return array is zero */ \
2128  template <typename Return, int offset, typename T, typename A> \
2129  Vc_INTRINSIC Vc_CONST \
2130  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2131  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2132  Return::Size * offset + Common::left_size<Return::Size>() >= \
2133  NativeType_<T, A>::Size), \
2134  Return> \
2135  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2136  { \
2137  vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2138  using R0 = typename Return::storage_type0; \
2139  using R1 = typename Return::storage_type1; \
2140  constexpr int entries_offset = offset * Return::Size; \
2141  return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2142  } \
2143  Vc_NOTHING_EXPECTING_SEMICOLON
2144 
2145 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2146 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2147 #undef Vc_SIMDARRAY_CASTS
2148 
2149 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
2150 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2151  /* indivisible SimdArrayType_ */ \
2152  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2153  Vc_INTRINSIC Vc_CONST \
2154  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2155  (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2156  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2157  Return> \
2158  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2159  { \
2160  vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2161  return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2162  } \
2163  /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2164  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2165  Vc_INTRINSIC Vc_CONST \
2166  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2167  (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2168  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2169  Return> \
2170  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2171  { \
2172  vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2173  return simd_cast_without_last<Return, \
2174  typename SimdArrayType_<T, N, V, N>::storage_type, \
2175  typename From::storage_type...>( \
2176  internal_data(x0), internal_data(xs)...); \
2177  } \
2178  /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2179  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2180  typename... From> \
2181  Vc_INTRINSIC Vc_CONST enable_if< \
2182  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2183  !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2184  is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2185  Return> \
2186  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2187  { \
2188  vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2189  return simd_cast_interleaved_argument_order< \
2190  Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2191  typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2192  internal_data1(x0), internal_data1(xs)...); \
2193  } \
2194  /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2195  * input can be dropped */ \
2196  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2197  typename... From> \
2198  Vc_INTRINSIC Vc_CONST enable_if< \
2199  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2200  !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2201  Return> \
2202  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2203  { \
2204  vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2205  return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2206  x0, xs...); \
2207  } \
2208  /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2209  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2210  typename... From> \
2211  Vc_INTRINSIC Vc_CONST enable_if< \
2212  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2213  N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2214  Return> \
2215  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2216  { \
2217  vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2218  return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2219  From...>(x0, xs...); \
2220  } \
2221  /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2222  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2223  typename... From> \
2224  Vc_INTRINSIC Vc_CONST enable_if< \
2225  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2226  N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2227  Return> \
2228  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2229  { \
2230  vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2231  return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2232  From...>(x0, xs...); \
2233  } \
2234  /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2235  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2236  Vc_INTRINSIC Vc_CONST \
2237  enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2238  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2239  { \
2240  vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2241  return simd_cast<Return>(internal_data0(x)); \
2242  } \
2243  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2244  Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2245  N < 2 * Return::Size && is_power_of_2<N>::value), \
2246  Return> \
2247  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2248  { \
2249  vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2250  return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2251  } \
2252  Vc_NOTHING_EXPECTING_SEMICOLON
2253 
2254 Vc_SIMDARRAY_CASTS(SimdArray);
2255 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2256 #undef Vc_SIMDARRAY_CASTS
2257 template <class Return, class T, int N, class... Ts,
2258  class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2259 Vc_INTRINSIC Return simd_cast(const fixed_size_simd<T, N> &x, const Ts &... xs)
2260 {
2261  return simd_cast<Return>(static_cast<const SimdArray<T, N> &>(x),
2262  static_cast<const SimdArray<T, N> &>(xs)...);
2263 }
2264 template <class Return, class T, int N, class... Ts,
2265  class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2266 Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask<T, N> &x, const Ts &... xs)
2267 {
2268  return simd_cast<Return>(static_cast<const SimdMaskArray<T, N> &>(x),
2269  static_cast<const SimdMaskArray<T, N> &>(xs)...);
2270 }
2271 
2272 // simd_cast<T, offset>(SimdArray/-mask) {{{2
2273 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2274  /* offset == 0 is like without offset */ \
2275  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2276  std::size_t M> \
2277  Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2278  const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2279  { \
2280  vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2281  return simd_cast<Return>(x); \
2282  } \
2283  /* forward to V */ \
2284  template <typename Return, int offset, typename T, std::size_t N, typename V> \
2285  Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2286  const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2287  { \
2288  vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2289  return simd_cast<Return, offset>(internal_data(x)); \
2290  } \
2291  /* convert from right member of SimdArray */ \
2292  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2293  std::size_t M> \
2294  Vc_INTRINSIC Vc_CONST \
2295  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2296  offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2297  Return> \
2298  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2299  { \
2300  vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2301  return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2302  internal_data1(x)); \
2303  } \
2304  /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2305  * left side of the SimdArray */ \
2306  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2307  std::size_t M> \
2308  Vc_INTRINSIC Vc_CONST \
2309  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2310  offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2311  Return> \
2312  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2313  { \
2314  vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2315  return simd_cast_with_offset<Return, \
2316  offset * Return::Size - Common::left_size<N>()>( \
2317  internal_data1(x)); \
2318  } \
2319  /* convert from left member of SimdArray */ \
2320  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2321  std::size_t M> \
2322  Vc_INTRINSIC Vc_CONST enable_if< \
2323  (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2324  offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2325  Return> \
2326  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2327  { \
2328  vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2329  return simd_cast<Return, offset>(internal_data0(x)); \
2330  } \
2331  /* fallback to copying scalars */ \
2332  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2333  std::size_t M> \
2334  Vc_INTRINSIC Vc_CONST \
2335  enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2336  offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2337  Return> \
2338  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2339  { \
2340  vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2341  using R = typename Return::EntryType; \
2342  Return r = Return(0); \
2343  for (std::size_t i = offset * Return::Size; \
2344  i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2345  r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2346  } \
2347  return r; \
2348  } \
2349  Vc_NOTHING_EXPECTING_SEMICOLON
2350 Vc_SIMDARRAY_CASTS(SimdArray);
2351 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2352 #undef Vc_SIMDARRAY_CASTS
2353 // simd_cast_drop_arguments (definitions) {{{2
2354 template <typename Return, typename From>
2355 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2356 {
2357  return simd_cast<Return>(x);
2358 }
2359 template <typename Return, typename... Froms>
2360 Vc_INTRINSIC Vc_CONST
2361  enable_if<(are_all_types_equal<Froms...>::value &&
2362  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2363  Return>
2364  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2365 {
2366  return simd_cast<Return>(xs..., x);
2367 }
2368 // The following function can be implemented without the sizeof...(From) overload.
2369 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2370 // function in two works around the issue.
2371 template <typename Return, typename From, typename... Froms>
2372 Vc_INTRINSIC Vc_CONST enable_if<
2373  (are_all_types_equal<From, Froms...>::value &&
2374  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2375  Return>
2376 simd_cast_drop_arguments(Froms... xs, From x, From)
2377 {
2378  return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2379 }
2380 template <typename Return, typename From>
2381 Vc_INTRINSIC Vc_CONST
2382  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2383  simd_cast_drop_arguments(From x, From)
2384 {
2385  return simd_cast_drop_arguments<Return>(x);
2386 }
2387 
2388 // simd_cast_with_offset (definitions) {{{2
2389  template <typename Return, std::size_t offset, typename From>
2390  Vc_INTRINSIC Vc_CONST
2391  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2392  Return> simd_cast_with_offset(const From &x)
2393 {
2394  return simd_cast<Return, offset / Return::Size>(x);
2395 }
2396 template <typename Return, std::size_t offset, typename From>
2397 Vc_INTRINSIC Vc_CONST
2398  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2400  !Traits::isAtomicSimdArray<Return>::value) ||
2402  !Traits::isAtomicSimdMaskArray<Return>::value))),
2403  Return>
2404  simd_cast_with_offset(const From &x)
2405 {
2406  using R0 = typename Return::storage_type0;
2407  using R1 = typename Return::storage_type1;
2408  return {simd_cast_with_offset<R0, offset>(x),
2409  simd_cast_with_offset<R1, offset + R0::Size>(x)};
2410 }
2411 template <typename Return, std::size_t offset, typename From>
2412 Vc_INTRINSIC Vc_CONST
2413  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2415  Traits::isAtomicSimdArray<Return>::value) ||
2417  Traits::isAtomicSimdMaskArray<Return>::value))),
2418  Return>
2419  simd_cast_with_offset(const From &x)
2420 {
2421  return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2422 }
2423 template <typename Return, std::size_t offset, typename From, typename... Froms>
2424 Vc_INTRINSIC Vc_CONST
2425  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2426  simd_cast_with_offset(const From &x, const Froms &... xs)
2427 {
2428  return simd_cast<Return>(x, xs...);
2429 }
2430 
2431 // simd_cast_without_last (definition) {{{2
2432 template <typename Return, typename T, typename... From>
2433 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2434 {
2435  return simd_cast<Return>(xs...);
2436 }
2437 
2438 // simd_cast_interleaved_argument_order (definitions) {{{2
2439 
2440 #ifdef Vc_MSVC
2441 // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2442 // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2443 // MSVC do the right thing.
2444 template <std::size_t I, typename T0>
2445 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2446 {
2447  return a0;
2448 }
2449 template <std::size_t I, typename T0>
2450 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2451 {
2452  return b0;
2453 }
2454 #endif // Vc_MSVC
2455 
2457 template <std::size_t I, typename T0, typename... Ts>
2458 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2459  const Ts &...,
2460  const T0 &,
2461  const Ts &...)
2462 {
2463  return a0;
2464 }
2466 template <std::size_t I, typename T0, typename... Ts>
2467 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2468  const Ts &...,
2469  const T0 &b0,
2470  const Ts &...)
2471 {
2472  return b0;
2473 }
2475 template <std::size_t I, typename T0, typename... Ts>
2476 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2477  const Ts &... a,
2478  const T0 &,
2479  const Ts &... b)
2480 {
2481  return extract_interleaved<I - 2, Ts...>(a..., b...);
2482 }
2484 template <typename Return, typename... Ts, std::size_t... Indexes>
2485 Vc_INTRINSIC Vc_CONST Return
2486  simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2487  const Ts &... b)
2488 {
2489  return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2490 }
2493 template <typename Return, typename... Ts>
2494 Vc_INTRINSIC Vc_CONST Return
2495  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2496 {
2497  using seq = make_index_sequence<sizeof...(Ts)*2>;
2498  return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2499 }
2500 
2501 // conditional_assign {{{1
2502 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2503  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2504  typename U> \
2505  Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2506  SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2507  { \
2508  lhs(mask) op_ rhs; \
2509  } \
2510  Vc_NOTHING_EXPECTING_SEMICOLON
2511 Vc_CONDITIONAL_ASSIGN( Assign, =);
2512 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2513 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2514 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2515 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2516 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2517 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2518 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2519 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2520 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2521 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2522 #undef Vc_CONDITIONAL_ASSIGN
2523 
2524 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2525  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2526  Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2527  conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2528  { \
2529  return expr_; \
2530  } \
2531  Vc_NOTHING_EXPECTING_SEMICOLON
2532 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2533 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2534 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2535 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2536 #undef Vc_CONDITIONAL_ASSIGN
2537 // transpose_impl {{{1
2538 namespace Common
2539 {
2540 template <typename T, size_t N, typename V>
2541 inline void transpose_impl(
2542  TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2543  const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2545 {
2546  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2547  &internal_data(*r[2]), &internal_data(*r[3])};
2548  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2549  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2550  internal_data(std::get<1>(proxy.in)),
2551  internal_data(std::get<2>(proxy.in)),
2552  internal_data(std::get<3>(proxy.in))});
2553 }
2554 
2555 template <typename T, typename V>
2556 inline void transpose_impl(
2557  TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2558  const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2560 {
2561  auto &lo = *r[0];
2562  auto &hi = *r[1];
2563  internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2564  internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2565  internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2566  internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2567  internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2568  internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2569  internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2570  internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2571 }
2572 
2573 template <typename T, typename V>
2574 inline void transpose_impl(
2575  TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2576  const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2578 {
2579  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2580  &internal_data(*r[2]), &internal_data(*r[3])};
2581  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2582  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2583  internal_data(std::get<1>(proxy.in)),
2584  internal_data(std::get<2>(proxy.in)),
2585  internal_data(std::get<3>(proxy.in))});
2586 }
2587 
2588 template <typename T, size_t N, typename V>
2589 inline void transpose_impl(
2590  TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2591  const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2593 {
2594  SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2595  SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2596  using H = SimdArray<T, 2>;
2597  transpose_impl(TransposeTag<2, 4>(), &r0[0],
2598  TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2599  internal_data0(std::get<1>(proxy.in)),
2600  internal_data0(std::get<2>(proxy.in)),
2601  internal_data0(std::get<3>(proxy.in))});
2602  transpose_impl(TransposeTag<2, 4>(), &r1[0],
2603  TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2604  internal_data1(std::get<1>(proxy.in)),
2605  internal_data1(std::get<2>(proxy.in)),
2606  internal_data1(std::get<3>(proxy.in))});
2607 }
2608 
2609 /* TODO:
2610 template <typename T, std::size_t N, typename V, std::size_t VSize>
2611 inline enable_if<(N > VSize), void> transpose_impl(
2612  std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2613  const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2614  SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2615 {
2616  typedef SimdArray<T, N, V, VSize> SA;
2617  std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2618  {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2619  &internal_data0(*r[3])}};
2620  transpose_impl(
2621  r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2622  typename SA::storage_type0, typename SA::storage_type0>{
2623  internal_data0(std::get<0>(proxy.in)),
2624  internal_data0(std::get<1>(proxy.in)),
2625  internal_data0(std::get<2>(proxy.in)),
2626  internal_data0(std::get<3>(proxy.in))});
2627 
2628  std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2629  {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2630  &internal_data1(*r[3])}};
2631  transpose_impl(
2632  r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2633  typename SA::storage_type1, typename SA::storage_type1>{
2634  internal_data1(std::get<0>(proxy.in)),
2635  internal_data1(std::get<1>(proxy.in)),
2636  internal_data1(std::get<2>(proxy.in)),
2637  internal_data1(std::get<3>(proxy.in))});
2638 }
2639 */
2640 } // namespace Common
2641 
2642 // }}}1
2643 namespace Detail
2644 {
2645 // InterleaveImpl for SimdArrays {{{
2646 // atomic {{{1
2647 template <class T, size_t N, class V, size_t VSizeof>
2648 struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2649  template <class I, class... VV>
2650  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2651  {
2652  InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2653  }
2654  template <class I, class... VV>
2655  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2656  {
2657  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2658  }
2659 };
2660 
2661 // generic (TODO) {{{1
2662 /*
2663 template <class T, size_t N, class V, size_t Wt, size_t VSizeof>
2664 struct InterleaveImpl<SimdArray<T, N, V, Wt>, N, VSizeof> {
2665  using SA = SimdArray<T, N, V, Wt>;
2666  using SA0 = typename SA::storage_type0;
2667  using SA1 = typename SA::storage_type1;
2668 
2669  template <class I, class... VV>
2670  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2671  {
2672  InterleaveImpl<SA0, SA0::size(), sizeof(SA0)>::interleave(
2673  data, i, // i needs to be split
2674  internal_data0(vv)...);
2675  InterleaveImpl<SA1, SA1::size(), sizeof(SA1)>::interleave(
2676  data, // how far to advance data?
2677  i, // i needs to be split
2678  internal_data1(vv)...);
2679  }
2680  template <class I, class... VV>
2681  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2682  {
2683  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2684  }
2685 };
2686 */
2687 } // namespace Detail
2688 // }}}
2690 
2691 } // namespace Vc_VERSIONED_NAMESPACE
2692 
2693 // numeric_limits {{{1
2694 namespace std
2695 {
2696 template <typename T, size_t N, typename V, size_t VN>
2697 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2698 private:
2699  using R = Vc::SimdArray<T, N, V, VN>;
2700 
2701 public:
2702  static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2703  static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2704  static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2705  {
2706  return numeric_limits<T>::lowest();
2707  }
2708  static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2709  {
2710  return numeric_limits<T>::epsilon();
2711  }
2712  static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2713  {
2714  return numeric_limits<T>::round_error();
2715  }
2716  static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2717  {
2718  return numeric_limits<T>::infinity();
2719  }
2720  static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2721  {
2722  return numeric_limits<T>::quiet_NaN();
2723  }
2724  static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2725  {
2726  return numeric_limits<T>::signaling_NaN();
2727  }
2728  static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2729  {
2730  return numeric_limits<T>::denorm_min();
2731  }
2732 };
2733 } // namespace std
2734 //}}}1
2735 
2736 #endif // VC_COMMON_SIMDARRAY_H_
2737 
2738 // vim: foldmethod=marker
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition: simdarray.h:1007
The main vector class for expressing data parallelism.
Definition: fwddecl.h:53
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:90
fixed_size_simd< T, N > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Definition: simdarray.h:1815
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vector apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:117
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
static fixed_size_simd< T, N > Zero()
Returns a vector with the entries initialized to zero.
Definition: simdarray.h:684
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Definition: simdarray.h:1682
fixed_size_simd< T, N > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Definition: simdarray.h:1771
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Definition: vector.h:249
static fixed_size_simd< T, N > IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition: simdarray.h:696
Identifies any possible SimdArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:145
fixed_size_simd< T, N > atan2(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: atan2 function component-wise and concurrently.
Definition: simdarray.h:1769
fixed_size_simd< T, N > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Definition: simdarray.h:1814
Identifies any possible SimdMaskArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:151
Data-parallel arithmetic type with user-defined number of elements.
Definition: fwddecl.h:82
The value member will either be the number of SIMD vector entries or 0 if T is not a SIMD type...
Definition: type_traits.h:174
fixed_size_simd< T, N > rotated(int amount) const
Rotate vector entries to the left by amount.
Definition: simdarray.h:1221
Vector reversed() const
Returns a vector with all components reversed.
fixed_size_simd< T, N > reversed() const
Returns a vector with all components reversed.
Definition: simdarray.h:1304
Data-parallel mask type with user-defined number of boolean elements.
Definition: fwddecl.h:86
Vector sorted() const
Return a sorted copy of the vector.
Vector rotated(int amount) const
Rotate vector entries to the left by amount.
Vector shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1223
fixed_size_simd< T, N > sorted() const
Return a sorted copy of the vector.
Definition: simdarray.h:1326
fixed_size_simd_mask< T, N > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Definition: simdarray.h:1786
Identifies any SIMD vector type (independent of implementation or whether it&#39;s SimdArray<T, N>).
Definition: type_traits.h:136
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
Definition: simdarray.h:1015
static fixed_size_simd< T, N > Random()
Returns a vector with pseudo-random entries.
Definition: simdarray.h:702
Vector partialSum() const
Returns a vector containing the sum of all entries with smaller index.
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
Definition: simdarray.h:1682
static fixed_size_simd< T, N > One()
Returns a vector with the entries initialized to one.
Definition: simdarray.h:690
fixed_size_simd< T, N > apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition: simdarray.h:1079
fixed_size_simd< T, N > apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
Definition: simdarray.h:1085
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition: simdarray.h:658
value_type EntryType
The type of the elements (i.e. T)
Definition: simdarray.h:667
void deinterleave(V *a, V *b, const M *memory, A align)
Definition: deinterleave.h:76
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
void gather(const MT *mem, const IT &indexes)
Gather function.
Definition: simdarray.h:214
SimdArray(value_type a)
Broadcast Constructor.
Definition: simdarray.h:731
The main SIMD mask class.
Definition: fwddecl.h:52
void load(const EntryType *mem)
Load the vector entries from mem, overwriting the previous values.
Definition: vector.h:73
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:80
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1071
T value_type
The type of the elements (i.e. T)
Definition: simdarray.h:640
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
Definition: simdarray.h:1789
Vector Classes Namespace.
Definition: dox.h:584
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:85
void scatter(MT *mem, IT &&indexes) const
Scatter function.
Definition: simdarray.h:99
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
Definition: interleave.h:55
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:215
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition: simdarray.h:1000
fixed_size_simd< T, N > shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition: simdarray.h:1092
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition: simdarray.h:1808
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
Definition: simdarray.h:1778
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
Definition: simdarray.h:1795
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1774
static fixed_size_simd< T, N > generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition: simdarray.h:708
fixed_size_simd< T, N > operator+() const
Returns a copy of itself.
Definition: simdarray.h:925
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.