Vc  1.4.1
SIMD Vector Classes for C++
simdarray.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SIMDARRAY_H_
29 #define VC_COMMON_SIMDARRAY_H_
30 
31 //#define Vc_DEBUG_SIMD_CAST 1
32 //#define Vc_DEBUG_SORTED 1
33 //#include "../IO"
34 
35 #include <array>
36 
37 #include "writemaskedvector.h"
38 #include "simdarrayhelper.h"
39 #include "simdmaskarray.h"
40 #include "utility.h"
41 #include "interleave.h"
42 #include "indexsequence.h"
43 #include "transpose.h"
44 #include "macros.h"
45 
46 namespace Vc_VERSIONED_NAMESPACE
47 {
48 // select_best_vector_type {{{
49 namespace Common
50 {
53 
57 template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
58 // last candidate; this one must work; assume it does:
59 template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
60  using type = T;
61 };
62 // check the next candidate; use it if N >= T::size(); recurse otherwise:
63 template <std::size_t N, class T, class... Candidates>
64 struct select_best_vector_type_impl<N, T, Candidates...> {
65  using type = typename std::conditional<
66  (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
67  T>::type;
68 };
69 template <class T, std::size_t N>
70 struct select_best_vector_type : select_best_vector_type_impl<N,
71 #ifdef Vc_IMPL_AVX2
72  Vc::AVX2::Vector<T>,
73 #elif defined Vc_IMPL_AVX
74  Vc::AVX::Vector<T>,
75 #endif
76 #ifdef Vc_IMPL_SSE
77  Vc::SSE::Vector<T>,
78 #endif
79  Vc::Scalar::Vector<T>> {
80 };
82 } // namespace Common
83 // }}}
84 // internal namespace (product & sum helper) {{{1
85 namespace internal
86 {
87 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
88 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
89 } // namespace internal
90 
91 // min & max declarations {{{1
92 template <typename T, std::size_t N, typename V, std::size_t M>
94  const SimdArray<T, N, V, M> &y);
95 template <typename T, std::size_t N, typename V, std::size_t M>
97  const SimdArray<T, N, V, M> &y);
98 
99 // SimdArray class {{{1
102 
103 // atomic SimdArray {{{1
104 #define Vc_CURRENT_CLASS_NAME SimdArray
105 
114 template <typename T, std::size_t N, typename VectorType_>
115 class SimdArray<T, N, VectorType_, N>
116 {
117  static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
118  std::is_same<T, int32_t>::value ||
119  std::is_same<T, uint32_t>::value ||
120  std::is_same<T, int16_t>::value ||
121  std::is_same<T, uint16_t>::value,
122  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
123  "int16_t, uint16_t }");
124  static_assert(
125  std::is_same<VectorType_,
126  typename Common::select_best_vector_type<T, N>::type>::value &&
127  VectorType_::size() == N,
128  "ERROR: leave the third and fourth template parameters with their defaults. They "
129  "are implementation details.");
130 
131 public:
132  static constexpr bool is_atomic = true;
133  using VectorType = VectorType_;
134  using vector_type = VectorType;
135  using storage_type = vector_type;
136  using vectorentry_type = typename vector_type::VectorEntryType;
137  using value_type = T;
140  static constexpr std::size_t size() { return N; }
141  using Mask = mask_type;
142  using MaskType = Mask;
143  using MaskArgument = const MaskType &;
144  using VectorEntryType = vectorentry_type;
146  using IndexType = index_type;
147  using AsArg = const SimdArray &;
148  using reference = Detail::ElementReference<SimdArray>;
149  static constexpr std::size_t Size = size();
150  static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
151 
152  // zero init
153  Vc_INTRINSIC SimdArray() = default;
154 
155  // default copy ctor/operator
156  Vc_INTRINSIC SimdArray(const SimdArray &) = default;
157  Vc_INTRINSIC SimdArray(SimdArray &&) = default;
158  Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
159 
160  // broadcast
161  Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
162  Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
163  Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
164  template <
165  typename U,
166  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
167  Vc_INTRINSIC SimdArray(U a)
168  : SimdArray(static_cast<value_type>(a))
169  {
170  }
171 
172  // implicit casts
173  template <class U, class V, class = enable_if<N == V::Size>>
174  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
175  : data(simd_cast<vector_type>(internal_data(x)))
176  {
177  }
178  template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
179  class = U>
180  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
181  : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
182  internal_data(internal_data1(x))))
183  {
184  }
185  template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
186  class = U, class = U>
187  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
188  : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
189  internal_data(internal_data1(internal_data0(x))),
190  internal_data(internal_data0(internal_data1(x))),
191  internal_data(internal_data1(internal_data1(x)))))
192  {
193  }
194 
195  template <typename V, std::size_t Pieces, std::size_t Index>
196  Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
197  : data(simd_cast<vector_type, Index>(x.data))
198  {
199  }
200 
201  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
202  : data(init.begin(), Vc::Unaligned)
203  {
204  Vc_ASSERT(init.size() == size());
205  }
206 
207  // implicit conversion from underlying vector_type
208  template <
209  typename V,
210  typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
211  Vc_INTRINSIC SimdArray(const V &x)
212  : data(simd_cast<vector_type>(x))
213  {
214  }
215 
216  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
217  // T implicitly convertible to U
218  template <typename U, typename A,
219  typename =
220  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
221  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
222  Vc_INTRINSIC operator Vector<U, A>() const
223  {
224  return simd_cast<Vector<U, A>>(data);
225  }
226  operator fixed_size_simd<T, N> &()
227  {
228  return static_cast<fixed_size_simd<T, N> &>(*this);
229  }
230  operator const fixed_size_simd<T, N> &() const
231  {
232  return static_cast<const fixed_size_simd<T, N> &>(*this);
233  }
234 
235 #include "gatherinterface.h"
236 #include "scatterinterface.h"
237 
238  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
239  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
240  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
241  {
242  }
243  template <std::size_t Offset>
244  explicit Vc_INTRINSIC SimdArray(
245  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
246  : data(Vc::IndexesFromZero)
247  {
248  data += value_type(Offset);
249  }
250 
251  Vc_INTRINSIC void setZero() { data.setZero(); }
252  Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
253  Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
254  Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
255 
256  Vc_INTRINSIC void setQnan() { data.setQnan(); }
257  Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
258 
259  // internal: execute specified Operation
260  template <typename Op, typename... Args>
261  static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
262  {
264  Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
265  return r;
266  }
267 
268  template <typename Op, typename... Args>
269  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
270  {
271  Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
272  }
273 
274  static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
275  {
276  return SimdArray(Vc::Zero);
277  }
278  static Vc_INTRINSIC fixed_size_simd<T, N> One()
279  {
280  return SimdArray(Vc::One);
281  }
282  static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
283  {
284  return SimdArray(Vc::IndexesFromZero);
285  }
286  static Vc_INTRINSIC fixed_size_simd<T, N> Random()
287  {
288  return fromOperation(Common::Operations::random());
289  }
290 
291  // load ctor
292  template <class U, class Flags = DefaultLoadTag,
293  class = enable_if<std::is_arithmetic<U>::value &&
294  Traits::is_load_store_flag<Flags>::value>>
295  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags()) : data(mem, f)
296  {
297  }
298 
299  template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
300  {
301  data.load(std::forward<Args>(args)...);
302  }
303 
304  template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
305  {
306  data.store(std::forward<Args>(args)...);
307  }
308 
309  Vc_INTRINSIC mask_type operator!() const
310  {
311  return {private_init, !data};
312  }
313 
314  Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
315  {
316  return {private_init, -data};
317  }
318 
320  Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
321 
322  Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
323  {
324  return {private_init, ~data};
325  }
326 
327  template <typename U,
328  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
329  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
330  {
331  return {private_init, data << x};
332  }
333  template <typename U,
334  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
335  Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
336  {
337  data <<= x;
338  return *this;
339  }
340  template <typename U,
341  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
342  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
343  {
344  return {private_init, data >> x};
345  }
346  template <typename U,
347  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
348  Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
349  {
350  data >>= x;
351  return *this;
352  }
353 
354 #define Vc_BINARY_OPERATOR_(op) \
355  Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
356  { \
357  data op## = rhs.data; \
358  return *this; \
359  }
360  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
361  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
362  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
363 #undef Vc_BINARY_OPERATOR_
364 
366  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
367  {
368  return {private_init, isnegative(data)};
369  }
370 
371 private:
372  friend reference;
373  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
374  {
375  return o.data[i];
376  }
377  template <typename U>
378  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
379  noexcept(std::declval<value_type &>() = v))
380  {
381  o.data[i] = v;
382  }
383 
384 public:
391  Vc_INTRINSIC reference operator[](size_t i) noexcept
392  {
393  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
394  return {*this, int(i)};
395  }
396  Vc_INTRINSIC value_type operator[](size_t i) const noexcept
397  {
398  return get(*this, int(i));
399  }
400 
401  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
402  {
403  return {*this, k};
404  }
405 
406  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
407  {
408  data.assign(v.data, internal_data(k));
409  }
410 
411  // reductions ////////////////////////////////////////////////////////
412 #define Vc_REDUCTION_FUNCTION_(name_) \
413  Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
414  Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
415  { \
416  return data.name_(internal_data(mask)); \
417  } \
418  Vc_NOTHING_EXPECTING_SEMICOLON
419  Vc_REDUCTION_FUNCTION_(min);
420  Vc_REDUCTION_FUNCTION_(max);
421  Vc_REDUCTION_FUNCTION_(product);
422  Vc_REDUCTION_FUNCTION_(sum);
423 #undef Vc_REDUCTION_FUNCTION_
424  Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
425  {
426  return {private_init, data.partialSum()};
427  }
428 
429  template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f) const
430  {
431  return {private_init, data.apply(std::forward<F>(f))};
432  }
433  template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
434  {
435  return {private_init, data.apply(std::forward<F>(f), k)};
436  }
437 
438  Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount) const
439  {
440  return {private_init, data.shifted(amount)};
441  }
442 
443  template <std::size_t NN>
444  Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
445  const
446  {
447  return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
448  }
449 
450  Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
451  {
452  return {private_init, data.rotated(amount)};
453  }
454 
456  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
457  {
458  return {private_init, exponent(data)};
459  }
460 
461  Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x) const
462  {
463  return {private_init, data.interleaveLow(x.data)};
464  }
465  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x) const
466  {
467  return {private_init, data.interleaveHigh(x.data)};
468  }
469 
470  Vc_INTRINSIC fixed_size_simd<T, N> reversed() const
471  {
472  return {private_init, data.reversed()};
473  }
474 
475  Vc_INTRINSIC fixed_size_simd<T, N> sorted() const
476  {
477  return {private_init, data.sorted()};
478  }
479 
480  template <class G, class = decltype(std::declval<G>()(std::size_t())),
481  class = enable_if<!Traits::is_simd_vector<G>::value>>
482  Vc_INTRINSIC SimdArray(const G &gen) : data(gen)
483  {
484  }
485  template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
486  {
487  return {private_init, VectorType::generate(gen)};
488  }
489 
490  Vc_DEPRECATED("use copysign(x, y) instead")
491  Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
492  {
493  return {private_init, Vc::copysign(data, x.data)};
494  }
495 
496  friend VectorType &internal_data<>(SimdArray &x);
497  friend const VectorType &internal_data<>(const SimdArray &x);
498 
500  Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
501 
502  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
503 
504 private:
505  // The alignas attribute attached to the class declaration above is ignored by ICC
506  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
507  // all compilers.
508  alignas(static_cast<std::size_t>(
509  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
510  VectorType_::size()>::value)) storage_type data;
511 };
512 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
513 template <typename T, std::size_t N, typename VectorType>
515 template <typename T, std::size_t N, typename VectorType>
516 #ifndef Vc_MSVC
517 Vc_INTRINSIC
518 #endif
519 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
520 {
521  return x.data;
522 }
523 template <typename T, std::size_t N, typename VectorType>
524 #ifndef Vc_MSVC
525 Vc_INTRINSIC
526 #endif
527 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
528 {
529  return x.data;
530 }
531 
532 // unwrap {{{2
533 template <class T> Vc_INTRINSIC T unwrap(const T &x) { return x; }
534 
535 template <class T, size_t N, class V>
536 Vc_INTRINSIC V unwrap(const SimdArray<T, N, V, N> &x)
537 {
538  return internal_data(x);
539 }
540 
541 template <class T, size_t Pieces, size_t Index>
542 Vc_INTRINSIC auto unwrap(const Common::Segment<T, Pieces, Index> &x)
543  -> decltype(x.to_fixed_size())
544 {
545  return unwrap(x.to_fixed_size());
546 }
547 
548 // gatherImplementation {{{2
549 template <typename T, std::size_t N, typename VectorType>
550 template <class MT, class IT, int Scale>
552  const Common::GatherArguments<MT, IT, Scale> &args)
553 {
554  data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
555 }
556 template <typename T, std::size_t N, typename VectorType>
557 template <class MT, class IT, int Scale>
559  const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
560 {
561  data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
562  mask);
563 }
564 
565 // scatterImplementation {{{2
566 template <typename T, std::size_t N, typename VectorType>
567 template <typename MT, typename IT>
569  IT &&indexes) const
570 {
571  data.scatter(mem, unwrap(std::forward<IT>(indexes)));
572 }
573 template <typename T, std::size_t N, typename VectorType>
574 template <typename MT, typename IT>
576  IT &&indexes,
577  MaskArgument mask) const
578 {
579  data.scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
580 }
581 
582 // generic SimdArray {{{1
615 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
616 {
617  static_assert(std::is_same<T, double>::value ||
618  std::is_same<T, float>::value ||
619  std::is_same<T, int32_t>::value ||
620  std::is_same<T, uint32_t>::value ||
621  std::is_same<T, int16_t>::value ||
622  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
623  static_assert(
624  std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
625  V::size() == Wt,
626  "ERROR: leave the third and fourth template parameters with their defaults. They "
627  "are implementation details.");
628  static_assert(
629  // either the EntryType and VectorEntryType of the main V are equal
630  std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
631  // or N is a multiple of V::size()
632  (N % V::size() == 0),
633  "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
634  "MIC::(u)short_v::size(), i.e. k * 16.");
635 
636  using my_traits = SimdArrayTraits<T, N>;
637  static constexpr std::size_t N0 = my_traits::N0;
638  static constexpr std::size_t N1 = my_traits::N1;
639  using Split = Common::Split<N0>;
640  template <typename U, std::size_t K> using CArray = U[K];
641 
642 public:
643  static constexpr bool is_atomic = false;
644  using storage_type0 = typename my_traits::storage_type0;
645  using storage_type1 = typename my_traits::storage_type1;
646  static_assert(storage_type0::size() == N0, "");
647 
651  using vector_type = V;
652  using vectorentry_type = typename storage_type0::vectorentry_type;
653  typedef vectorentry_type alias_type Vc_MAY_ALIAS;
654 
656  using value_type = T;
657 
660 
663 
674  static constexpr std::size_t size() { return N; }
675 
677  using Mask = mask_type;
679  using MaskType = Mask;
680  using MaskArgument = const MaskType &;
681  using VectorEntryType = vectorentry_type;
686  using AsArg = const SimdArray &;
687 
688  using reference = Detail::ElementReference<SimdArray>;
689 
691  static constexpr std::size_t MemoryAlignment =
695 
698 
700  static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
701  {
702  return SimdArray(Vc::Zero);
703  }
704 
706  static Vc_INTRINSIC fixed_size_simd<T, N> One()
707  {
708  return SimdArray(Vc::One);
709  }
710 
713  {
714  return SimdArray(Vc::IndexesFromZero);
715  }
716 
718  static Vc_INTRINSIC fixed_size_simd<T, N> Random()
719  {
720  return fromOperation(Common::Operations::random());
721  }
722 
723  template <class G, class = decltype(std::declval<G>()(std::size_t())),
724  class = enable_if<!Traits::is_simd_vector<G>::value>>
725  Vc_INTRINSIC SimdArray(const G &gen)
726  : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); })
727  {
728  }
729 
731  template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen) // {{{2
732  {
733  auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
734  // an initializer list is well-defined
735  // (front to back), but GCC 4.8 doesn't
736  // implement this correctly. Therefore
737  // we enforce correct order.
738  return {std::move(tmp),
739  storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
740  }
742 
745 
747  SimdArray() = default;
749 
752 
754  Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
755  template <
756  typename U,
757  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
758  SimdArray(U a)
759  : SimdArray(static_cast<value_type>(a))
760  {
761  }
763 
764  // default copy ctor/operator
765  SimdArray(const SimdArray &) = default;
766  SimdArray(SimdArray &&) = default;
767  SimdArray &operator=(const SimdArray &) = default;
768 
769  // load ctor
770  template <typename U, typename Flags = DefaultLoadTag,
771  typename = enable_if<std::is_arithmetic<U>::value &&
772  Traits::is_load_store_flag<Flags>::value>>
773  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
774  : data0(mem, f), data1(mem + storage_type0::size(), f)
775  {
776  }
777 
778 // MSVC does overload resolution differently and takes the const U *mem overload (I hope)
779 #ifndef Vc_MSVC
780 
786  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
787  typename = enable_if<std::is_arithmetic<U>::value &&
788  Traits::is_load_store_flag<Flags>::value>>
789  explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
790  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
791  {
792  }
796  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
797  typename = enable_if<std::is_arithmetic<U>::value &&
798  Traits::is_load_store_flag<Flags>::value>>
799  explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags())
800  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
801  {
802  }
803 #endif
804 
805  // initializer list
806  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
807  : data0(init.begin(), Vc::Unaligned)
808  , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
809  {
810  Vc_ASSERT(init.size() == size());
811  }
812 
813 #include "gatherinterface.h"
814 #include "scatterinterface.h"
815 
816  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
817  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
818  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
819  : data0(i)
820  , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
821  storage_type0::size()>())
822  {
823  }
824  template <size_t Offset>
825  explicit Vc_INTRINSIC SimdArray(
826  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
827  : data0(i)
828  , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
829  storage_type0::size() + Offset>())
830  {
831  }
832 
833  // explicit casts
834  template <class W, class = enable_if<
837  !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
839  Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
840  {
841  }
842 
843  // implicit casts
844  template <class W, class = enable_if<
846  Traits::simd_vector_size<W>::value == N &&
847  std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
848  class = W>
849  Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
850  {
851  }
852 
853  template <class W, std::size_t Pieces, std::size_t Index>
854  Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
855  : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
856  , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
857  {
858  }
859 
860  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
861  // T implicitly convertible to U
862  template <typename U, typename A,
863  typename =
864  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
865  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
866  operator Vector<U, A>() const
867  {
868  auto r = simd_cast<Vector<U, A>>(data0, data1);
869  return r;
870  }
871  Vc_INTRINSIC operator fixed_size_simd<T, N> &()
872  {
873  return static_cast<fixed_size_simd<T, N> &>(*this);
874  }
875  Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
876  {
877  return static_cast<const fixed_size_simd<T, N> &>(*this);
878  }
879 
881 
882  Vc_INTRINSIC void setZero()
883  {
884  data0.setZero();
885  data1.setZero();
886  }
887  Vc_INTRINSIC void setZero(const mask_type &k)
888  {
889  data0.setZero(Split::lo(k));
890  data1.setZero(Split::hi(k));
891  }
892  Vc_INTRINSIC void setZeroInverted()
893  {
894  data0.setZeroInverted();
895  data1.setZeroInverted();
896  }
897  Vc_INTRINSIC void setZeroInverted(const mask_type &k)
898  {
899  data0.setZeroInverted(Split::lo(k));
900  data1.setZeroInverted(Split::hi(k));
901  }
902 
903 
904  Vc_INTRINSIC void setQnan() {
905  data0.setQnan();
906  data1.setQnan();
907  }
908  Vc_INTRINSIC void setQnan(const mask_type &m) {
909  data0.setQnan(Split::lo(m));
910  data1.setQnan(Split::hi(m));
911  }
912 
914  template <typename Op, typename... Args>
915  static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
916  {
918  storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
919  // could move and thus
920  // break the next line
921  storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
922  return r;
923  }
924 
926  template <typename Op, typename... Args>
927  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
928  {
929  storage_type0::callOperation(op, Split::lo(args)...);
930  storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
931  }
932 
933 
934  template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
935  {
936  data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
937  // break the next line
938  data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
939  }
940 
941  template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
942  {
943  data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
944  // break the next line
945  data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
946  }
947 
948  Vc_INTRINSIC mask_type operator!() const
949  {
950  return {!data0, !data1};
951  }
952 
953  Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
954  {
955  return {-data0, -data1};
956  }
957 
959  Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
960 
961  Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
962  {
963  return {~data0, ~data1};
964  }
965 
966  // left/right shift operators {{{2
967  template <typename U,
968  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
969  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
970  {
971  return {data0 << x, data1 << x};
972  }
973  template <typename U,
974  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
975  Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
976  {
977  data0 <<= x;
978  data1 <<= x;
979  return *this;
980  }
981  template <typename U,
982  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
983  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
984  {
985  return {data0 >> x, data1 >> x};
986  }
987  template <typename U,
988  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
989  Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
990  {
991  data0 >>= x;
992  data1 >>= x;
993  return *this;
994  }
995 
996  // binary operators {{{2
997 #define Vc_BINARY_OPERATOR_(op) \
998  Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
999  { \
1000  data0 op## = rhs.data0; \
1001  data1 op## = rhs.data1; \
1002  return *this; \
1003  }
1004  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1005  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1006  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1007 #undef Vc_BINARY_OPERATOR_
1008 
1009  // operator[] {{{2
1012 
1013 private:
1014  friend reference;
1015  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
1016  {
1017  return reinterpret_cast<const alias_type *>(&o)[i];
1018  }
1019  template <typename U>
1020  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
1021  noexcept(std::declval<value_type &>() = v))
1022  {
1023  reinterpret_cast<alias_type *>(&o)[i] = v;
1024  }
1025 
1026 public:
1028 
1034  Vc_INTRINSIC reference operator[](size_t i) noexcept
1035  {
1036  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1037  return {*this, int(i)};
1038  }
1039 
1041  Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1042  {
1043  return get(*this, int(index));
1044  }
1046 
1047  // operator(){{{2
1049  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1050  const mask_type &mask)
1051  {
1052  return {*this, mask};
1053  }
1054 
1056  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
1057  {
1058  data0.assign(v.data0, internal_data0(k));
1059  data1.assign(v.data1, internal_data1(k));
1060  }
1061 
1062  // reductions {{{2
1063 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1064 private: \
1065  template <typename ForSfinae = void> \
1066  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1067  storage_type0::Size == storage_type1::Size, \
1068  value_type> name_##_impl() const \
1069  { \
1070  return binary_fun_(data0, data1).name_(); \
1071  } \
1072  \
1073  template <typename ForSfinae = void> \
1074  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1075  storage_type0::Size != storage_type1::Size, \
1076  value_type> name_##_impl() const \
1077  { \
1078  return scalar_fun_(data0.name_(), data1.name_()); \
1079  } \
1080  \
1081 public: \
1082  \
1083  Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1084  \
1085  Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1086  { \
1087  if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1088  return data1.name_(Split::hi(mask)); \
1089  } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1090  return data0.name_(Split::lo(mask)); \
1091  } else { \
1092  return scalar_fun_(data0.name_(Split::lo(mask)), \
1093  data1.name_(Split::hi(mask))); \
1094  } \
1095  } \
1096  Vc_NOTHING_EXPECTING_SEMICOLON
1097  Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1098  Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1099  Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1100  Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1101 #undef Vc_REDUCTION_FUNCTION_
1102  Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const //{{{2
1104  {
1105  auto ps0 = data0.partialSum();
1106  auto tmp = data1;
1107  tmp[0] += ps0[data0.size() - 1];
1108  return {std::move(ps0), tmp.partialSum()};
1109  }
1110 
1111  // apply {{{2
1113  template <typename F> inline fixed_size_simd<T, N> apply(F &&f) const
1114  {
1115  return {data0.apply(f), data1.apply(f)};
1116  }
1118  template <typename F>
1119  inline fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
1120  {
1121  return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1122  }
1123 
1124  // shifted {{{2
1126  inline fixed_size_simd<T, N> shifted(int amount) const
1127  {
1128  constexpr int SSize = Size;
1129  constexpr int SSize0 = storage_type0::Size;
1130  constexpr int SSize1 = storage_type1::Size;
1131  if (amount == 0) {
1132  return *this;
1133  }
1134  if (amount < 0) {
1135  if (amount > -SSize0) {
1136  return {data0.shifted(amount), data1.shifted(amount, data0)};
1137  }
1138  if (amount == -SSize0) {
1139  return {storage_type0(0), simd_cast<storage_type1>(data0)};
1140  }
1141  if (amount < -SSize0) {
1142  return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1143  amount + SSize0))};
1144  }
1145  return Zero();
1146  } else {
1147  if (amount >= SSize) {
1148  return Zero();
1149  } else if (amount >= SSize0) {
1150  return {
1151  simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1152  storage_type1(0)};
1153  } else if (amount >= SSize1) {
1154  return {data0.shifted(amount, data1), storage_type1(0)};
1155  } else {
1156  return {data0.shifted(amount, data1), data1.shifted(amount)};
1157  }
1158  }
1159  }
1160 
1161  template <std::size_t NN>
1162  inline enable_if<
1163  !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1164  N == NN),
1166  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1167  {
1168  constexpr int SSize = Size;
1169  if (amount < 0) {
1170  return fixed_size_simd<T, N>([&](int i) -> value_type {
1171  i += amount;
1172  if (i >= 0) {
1173  return operator[](i);
1174  } else if (i >= -SSize) {
1175  return shiftIn[i + SSize];
1176  }
1177  return 0;
1178  });
1179  }
1180  return fixed_size_simd<T, N>([&](int i) -> value_type {
1181  i += amount;
1182  if (i < SSize) {
1183  return operator[](i);
1184  } else if (i < 2 * SSize) {
1185  return shiftIn[i - SSize];
1186  }
1187  return 0;
1188  });
1189  }
1190 
1191 private:
1192  // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1193  // expression directly in the enable_if below
1194  template <std::size_t NN> struct bisectable_shift
1195  : public std::integral_constant<bool,
1196  std::is_same<storage_type0, storage_type1>::value && // bisectable
1197  N == NN>
1198  {
1199  };
1200 
1201 public:
1202  template <std::size_t NN>
1204  enable_if<bisectable_shift<NN>::value, int> amount,
1205  const SimdArray<value_type, NN> &shiftIn) const
1206  {
1207  constexpr int SSize = Size;
1208  if (amount < 0) {
1209  if (amount > -static_cast<int>(storage_type0::Size)) {
1210  return {data0.shifted(amount, internal_data1(shiftIn)),
1211  data1.shifted(amount, data0)};
1212  }
1213  if (amount == -static_cast<int>(storage_type0::Size)) {
1214  return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1215  }
1216  if (amount > -SSize) {
1217  return {
1218  internal_data1(shiftIn)
1219  .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1220  data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1221  }
1222  if (amount == -SSize) {
1223  return shiftIn;
1224  }
1225  if (amount > -2 * SSize) {
1226  return shiftIn.shifted(amount + SSize);
1227  }
1228  }
1229  if (amount == 0) {
1230  return *this;
1231  }
1232  if (amount < static_cast<int>(storage_type0::Size)) {
1233  return {data0.shifted(amount, data1),
1234  data1.shifted(amount, internal_data0(shiftIn))};
1235  }
1236  if (amount == static_cast<int>(storage_type0::Size)) {
1237  return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1238  }
1239  if (amount < SSize) {
1240  return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1241  internal_data0(shiftIn)
1242  .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1243  }
1244  if (amount == SSize) {
1245  return shiftIn;
1246  }
1247  if (amount < 2 * SSize) {
1248  return shiftIn.shifted(amount - SSize);
1249  }
1250  return Zero();
1251  }
1252 
1253  // rotated {{{2
1255  Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
1256  {
1257  amount %= int(size());
1258  if (amount == 0) {
1259  return *this;
1260  } else if (amount < 0) {
1261  amount += size();
1262  }
1263 
1264 #ifdef Vc_MSVC
1265  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1266  // ->
1267  // load to implement the function instead.
1268  alignas(MemoryAlignment) T tmp[N + data0.size()];
1269  data0.store(&tmp[0], Vc::Aligned);
1270  data1.store(&tmp[data0.size()], Vc::Aligned);
1271  data0.store(&tmp[N], Vc::Unaligned);
1273  r.data0.load(&tmp[amount], Vc::Unaligned);
1274  r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1275  return r;
1276 #else
1277  auto &&d0cvtd = simd_cast<storage_type1>(data0);
1278  auto &&d1cvtd = simd_cast<storage_type0>(data1);
1279  constexpr int size0 = storage_type0::size();
1280  constexpr int size1 = storage_type1::size();
1281 
1282  if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1283  return {std::move(d1cvtd), std::move(d0cvtd)};
1284  } else if (amount < size1) {
1285  return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1286  } else if (amount == size1) {
1287  return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1288  } else if (int(size()) - amount < size1) {
1289  return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1290  data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1291  } else if (int(size()) - amount == size1) {
1292  return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1293  simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1294  } else if (amount <= size0) {
1295  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1296  simd_cast<storage_type1>(data0.shifted(amount - size1))};
1297  } else {
1298  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1299  simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1300  }
1301  return *this;
1302 #endif
1303  }
1304 
1305  // interleaveLow/-High {{{2
1307  Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(const SimdArray &x) const
1308  {
1309  // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1310  return {data0.interleaveLow(x.data0),
1311  simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1312  }
1314  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(const SimdArray &x) const
1315  {
1316  return interleaveHighImpl(
1317  x,
1318  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1319  }
1320 
1321 private:
1323  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::true_type) const
1324  {
1325  return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1326  }
1328  inline fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::false_type) const
1329  {
1330  return {data0.interleaveHigh(x.data0)
1331  .shifted(storage_type1::Size,
1332  simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1333  data1.interleaveHigh(x.data1)};
1334  }
1335 
1336 public:
1338  inline fixed_size_simd<T, N> reversed() const //{{{2
1339  {
1340  if (std::is_same<storage_type0, storage_type1>::value) {
1341  return {simd_cast<storage_type0>(data1).reversed(),
1342  simd_cast<storage_type1>(data0).reversed()};
1343  } else {
1344 #ifdef Vc_MSVC
1345  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1346  // store
1347  // -> load to implement the function instead.
1348  alignas(MemoryAlignment) T tmp[N];
1349  data1.reversed().store(&tmp[0], Vc::Aligned);
1350  data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1351  return fixed_size_simd<T, N>{&tmp[0], Vc::Aligned};
1352 #else
1353  return {data0.shifted(storage_type1::Size, data1).reversed(),
1354  simd_cast<storage_type1>(data0.reversed().shifted(
1355  storage_type0::Size - storage_type1::Size))};
1356 #endif
1357  }
1358  }
1360  inline fixed_size_simd<T, N> sorted() const //{{{2
1361  {
1362  return sortedImpl(
1363  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1364  }
1365 
1367  Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::true_type) const
1368  {
1369 #ifdef Vc_DEBUG_SORTED
1370  std::cerr << "-- " << data0 << data1 << '\n';
1371 #endif
1372  const auto a = data0.sorted();
1373  const auto b = data1.sorted().reversed();
1374  const auto lo = Vc::min(a, b);
1375  const auto hi = Vc::max(a, b);
1376  return {lo.sorted(), hi.sorted()};
1377  }
1378 
1380  Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type) const
1381  {
1382  using SortableArray =
1384  auto sortable = simd_cast<SortableArray>(*this);
1385  for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1386  using limits = std::numeric_limits<value_type>;
1387  if (limits::has_infinity) {
1388  sortable[i] = limits::infinity();
1389  } else {
1390  sortable[i] = std::numeric_limits<value_type>::max();
1391  }
1392  }
1393  return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1394 
1395  /* The following implementation appears to be less efficient. But this may need further
1396  * work.
1397  const auto a = data0.sorted();
1398  const auto b = data1.sorted();
1399 #ifdef Vc_DEBUG_SORTED
1400  std::cerr << "== " << a << b << '\n';
1401 #endif
1402  auto aIt = Vc::begin(a);
1403  auto bIt = Vc::begin(b);
1404  const auto aEnd = Vc::end(a);
1405  const auto bEnd = Vc::end(b);
1406  return SimdArray::generate([&](std::size_t) {
1407  if (aIt == aEnd) {
1408  return *(bIt++);
1409  }
1410  if (bIt == bEnd) {
1411  return *(aIt++);
1412  }
1413  if (*aIt < *bIt) {
1414  return *(aIt++);
1415  } else {
1416  return *(bIt++);
1417  }
1418  });
1419  */
1420  }
1421 
1424 
1427  static constexpr std::size_t Size = size();
1428 
1430  Vc_DEPRECATED("use exponent(x) instead")
1431  Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
1432  {
1433  return {exponent(data0), exponent(data1)};
1434  }
1435 
1437  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1438  {
1439  return {isnegative(data0), isnegative(data1)};
1440  }
1441 
1443  Vc_DEPRECATED("use copysign(x, y) instead")
1444  Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
1445  {
1446  return {Vc::copysign(data0, x.data0),
1447  Vc::copysign(data1, x.data1)};
1448  }
1450 
1451  // internal_data0/1 {{{2
1452  friend storage_type0 &internal_data0<>(SimdArray &x);
1453  friend storage_type1 &internal_data1<>(SimdArray &x);
1454  friend const storage_type0 &internal_data0<>(const SimdArray &x);
1455  friend const storage_type1 &internal_data1<>(const SimdArray &x);
1456 
1458  Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1459  : data0(std::move(x)), data1(std::move(y))
1460  {
1461  }
1462 
1463  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1464 
1465 private: //{{{2
1466  // The alignas attribute attached to the class declaration above is ignored by ICC
1467  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1468  // all compilers.
1469  alignas(static_cast<std::size_t>(
1470  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1471  V::size()>::value)) storage_type0 data0;
1472  storage_type1 data1;
1473 };
1474 #undef Vc_CURRENT_CLASS_NAME
1475 template <typename T, std::size_t N, typename V, std::size_t M>
1476 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1477 template <typename T, std::size_t N, typename V, std::size_t M>
1478 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1479 
1480 // gatherImplementation {{{2
1481 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1482 template <class MT, class IT, int Scale>
1484  const Common::GatherArguments<MT, IT, Scale> &args)
1485 {
1486  data0.gather(Common::make_gather<Scale>(
1487  args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1488  data1.gather(Common::make_gather<Scale>(
1489  args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1490 }
1491 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1492 template <class MT, class IT, int Scale>
1494  const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
1495 {
1496  data0.gather(Common::make_gather<Scale>(
1497  args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1498  Split::lo(mask));
1499  data1.gather(Common::make_gather<Scale>(
1500  args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1501  Split::hi(mask));
1502 }
1503 
1504 // scatterImplementation {{{2
1505 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1506 template <typename MT, typename IT>
1508  IT &&indexes) const
1509 {
1510  data0.scatter(mem, Split::lo(Common::Operations::gather(),
1511  indexes)); // don't forward indexes - it could move and
1512  // thus break the next line
1513  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1514 }
1515 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1516 template <typename MT, typename IT>
1518  IT &&indexes, MaskArgument mask) const
1519 {
1520  data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1521  Split::lo(mask)); // don't forward indexes - it could move and
1522  // thus break the next line
1523  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1524  Split::hi(mask));
1525 }
1526 
1527 // internal_data0/1 (SimdArray) {{{1
1529 template <typename T, std::size_t N, typename V, std::size_t M>
1530 #ifndef Vc_MSVC
1531 Vc_INTRINSIC
1532 #endif
1533 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1535 {
1536  return x.data0;
1537 }
1539 template <typename T, std::size_t N, typename V, std::size_t M>
1540 #ifndef Vc_MSVC
1541 Vc_INTRINSIC
1542 #endif
1543 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1545 {
1546  return x.data1;
1547 }
1549 template <typename T, std::size_t N, typename V, std::size_t M>
1550 #ifndef Vc_MSVC
1551 Vc_INTRINSIC
1552 #endif
1553 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1554  const SimdArray<T, N, V, M> &x)
1555 {
1556  return x.data0;
1557 }
1559 template <typename T, std::size_t N, typename V, std::size_t M>
1560 #ifndef Vc_MSVC
1561 Vc_INTRINSIC
1562 #endif
1563 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1564  const SimdArray<T, N, V, M> &x)
1565 {
1566  return x.data1;
1567 }
1568 
1569 // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1570 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1571 // in the body the bug is supressed.
1572 #if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1573 template <>
1576  : data0(x), data1(0)
1577 {
1578  data1 = y;
1579 }
1580 #endif
1581 
1582 // binary operators {{{
1583 namespace Detail
1584 {
1585 #define Vc_FIXED_OP(op) \
1586  template <class T, int N, \
1587  class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1588  fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1589  const fixed_size_simd<T, N> &b) \
1590  { \
1591  return {private_init, internal_data(a) op internal_data(b)}; \
1592  } \
1593  template <class T, int N, \
1594  class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1595  class = T> \
1596  fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1597  const fixed_size_simd<T, N> &b) \
1598  { \
1599  return {internal_data0(a) op internal_data0(b), \
1600  internal_data1(a) op internal_data1(b)}; \
1601  }
1602 Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1603 Vc_ALL_BINARY(Vc_FIXED_OP);
1604 Vc_ALL_SHIFTS(Vc_FIXED_OP);
1605 #undef Vc_FIXED_OP
1606 #define Vc_FIXED_OP(op) \
1607  template <class T, int N, \
1608  class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1609  fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1610  const fixed_size_simd<T, N> &b) \
1611  { \
1612  return {private_init, internal_data(a) op internal_data(b)}; \
1613  } \
1614  template <class T, int N, \
1615  class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1616  class = T> \
1617  fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1618  const fixed_size_simd<T, N> &b) \
1619  { \
1620  return {internal_data0(a) op internal_data0(b), \
1621  internal_data1(a) op internal_data1(b)}; \
1622  }
1623 Vc_ALL_COMPARES(Vc_FIXED_OP);
1624 #undef Vc_FIXED_OP
1625 } // namespace Detail
1626 
1627 // }}}
1628 // binary operators {{{1
1629 namespace result_vector_type_internal
1630 {
1631 template <typename T>
1632 using remove_cvref = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1633 
1634 template <typename T>
1635 using is_integer_larger_than_int = std::integral_constant<
1636  bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1637  std::is_same<T, long>::value ||
1638  std::is_same<T, unsigned long>::value)>;
1639 
1640 template <
1641  typename L, typename R,
1645  Traits::isSimdArray<R>::value) && // one of the operands must be a SimdArray
1646  !(Traits::is_fixed_size_simd<L>::value && // if both are fixed_size, use
1647  Traits::is_fixed_size_simd<R>::value) && // common/operators.h
1648  ((std::is_arithmetic<remove_cvref<L>>::value && // one of the operands is a
1649  !is_integer_larger_than_int<remove_cvref<L>>::value) || // scalar type
1650  (std::is_arithmetic<remove_cvref<R>>::value &&
1651  !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1652  // or one of the operands is Vector<T> with Vector<T>::size() ==
1653  // SimdArray::size()
1655 struct evaluate;
1656 
1657 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1658 {
1659 private:
1660  using LScalar = Traits::entry_type_of<L>;
1661  using RScalar = Traits::entry_type_of<R>;
1662 
1663  template <bool B, typename T, typename F>
1664  using conditional = typename std::conditional<B, T, F>::type;
1665 
1666 public:
1667  // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1668  // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1669  // int are promoted to int before any operation). This would imply that SIMD types with integral
1670  // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1671  // start. Therefore we special-case those operations where the scalar type of both operands is
1672  // integral and smaller than int.
1673  // In addition, there is no generic support for 64-bit int SIMD types. Therefore
1674  // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1675  // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1676  // then the operator is disabled altogether. We do not want an implicit demotion.
1677  using type = fixed_size_simd<
1678  conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1679  sizeof(LScalar) < sizeof(int) &&
1680  sizeof(RScalar) < sizeof(int)),
1681  conditional<(sizeof(LScalar) == sizeof(RScalar)),
1682  conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1683  conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1684  decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1685  N>;
1686 };
1687 
1688 } // namespace result_vector_type_internal
1689 
1690 template <typename L, typename R>
1691 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1692 
1693 #define Vc_BINARY_OPERATORS_(op_) \
1694  \
1695  template <typename L, typename R> \
1696  Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1697  { \
1698  using Return = result_vector_type<L, R>; \
1699  return Vc::Detail::operator op_( \
1700  static_cast<const Return &>(std::forward<L>(lhs)), \
1701  static_cast<const Return &>(std::forward<R>(rhs))); \
1702  }
1703 
1720 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1722 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1724 #undef Vc_BINARY_OPERATORS_
1725 #define Vc_BINARY_OPERATORS_(op_) \
1726  \
1727  template <typename L, typename R> \
1728  Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1729  R &&rhs) \
1730  { \
1731  using Promote = result_vector_type<L, R>; \
1732  return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1733  }
1734 
1751 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1754 #undef Vc_BINARY_OPERATORS_
1755 
1756 // math functions {{{1
1757 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1758  \
1759  template <typename T, std::size_t N, typename V, std::size_t M> \
1760  inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \
1761  { \
1762  return fixed_size_simd<T, N>::fromOperation( \
1763  Common::Operations::Forward_##name_(), x); \
1764  } \
1765  template <class T, int N> \
1766  fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \
1767  { \
1768  return fixed_size_simd<T, N>::fromOperation( \
1769  Common::Operations::Forward_##name_(), x); \
1770  } \
1771  Vc_NOTHING_EXPECTING_SEMICOLON
1772 
1773 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1774  \
1775  template <typename T, std::size_t N, typename V, std::size_t M> \
1776  inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \
1777  { \
1778  return fixed_size_simd_mask<T, N>::fromOperation( \
1779  Common::Operations::Forward_##name_(), x); \
1780  } \
1781  template <class T, int N> \
1782  fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \
1783  { \
1784  return fixed_size_simd_mask<T, N>::fromOperation( \
1785  Common::Operations::Forward_##name_(), x); \
1786  } \
1787  Vc_NOTHING_EXPECTING_SEMICOLON
1788 
1789 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1790  \
1791  template <typename T, std::size_t N, typename V, std::size_t M> \
1792  inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \
1793  const SimdArray<T, N, V, M> &y) \
1794  { \
1795  return fixed_size_simd<T, N>::fromOperation( \
1796  Common::Operations::Forward_##name_(), x, y); \
1797  } \
1798  Vc_NOTHING_EXPECTING_SEMICOLON
1799 
1804 Vc_FORWARD_UNARY_OPERATOR(abs);
1806 Vc_FORWARD_UNARY_OPERATOR(asin);
1807 Vc_FORWARD_UNARY_OPERATOR(atan);
1808 Vc_FORWARD_BINARY_OPERATOR(atan2);
1809 Vc_FORWARD_UNARY_OPERATOR(ceil);
1810 Vc_FORWARD_BINARY_OPERATOR(copysign);
1811 Vc_FORWARD_UNARY_OPERATOR(cos);
1812 Vc_FORWARD_UNARY_OPERATOR(exp);
1813 Vc_FORWARD_UNARY_OPERATOR(exponent);
1814 Vc_FORWARD_UNARY_OPERATOR(floor);
1816 template <typename T, std::size_t N>
1818  const SimdArray<T, N> &c)
1819 {
1820  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1821 }
1822 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1823 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1824 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1825 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1827 template <typename T, std::size_t N>
1829 {
1830  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1831 }
1833 template <typename T, std::size_t N>
1835 {
1836  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1837 }
1838 Vc_FORWARD_UNARY_OPERATOR(log);
1839 Vc_FORWARD_UNARY_OPERATOR(log10);
1840 Vc_FORWARD_UNARY_OPERATOR(log2);
1841 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1842 Vc_FORWARD_UNARY_OPERATOR(round);
1843 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1844 Vc_FORWARD_UNARY_OPERATOR(sin);
1846 template <typename T, std::size_t N>
1848 {
1849  SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1850 }
1851 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1852 Vc_FORWARD_UNARY_OPERATOR(trunc);
1853 Vc_FORWARD_BINARY_OPERATOR(min);
1854 Vc_FORWARD_BINARY_OPERATOR(max);
1856 #undef Vc_FORWARD_UNARY_OPERATOR
1857 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1858 #undef Vc_FORWARD_BINARY_OPERATOR
1859 
1860 // simd_cast {{{1
1861 #ifdef Vc_MSVC
1862 #define Vc_DUMMY_ARG0 , int = 0
1863 #define Vc_DUMMY_ARG1 , long = 0
1864 #define Vc_DUMMY_ARG2 , short = 0
1865 #define Vc_DUMMY_ARG3 , char = '0'
1866 #define Vc_DUMMY_ARG4 , unsigned = 0u
1867 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1868 #else
1869 #define Vc_DUMMY_ARG0
1870 #define Vc_DUMMY_ARG1
1871 #define Vc_DUMMY_ARG2
1872 #define Vc_DUMMY_ARG3
1873 #define Vc_DUMMY_ARG4
1874 #define Vc_DUMMY_ARG5
1875 #endif // Vc_MSVC
1876 
1877 // simd_cast_impl_smaller_input {{{2
1878 // The following function can be implemented without the sizeof...(From) overload.
1879 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1880 // function in two works around the issue.
1881 template <typename Return, std::size_t N, typename T, typename... From>
1882 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1883 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1884 {
1885  Return r = simd_cast<Return>(xs...);
1886  for (size_t i = 0; i < N; ++i) {
1887  r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1888  }
1889  return r;
1890 }
1891 template <typename Return, std::size_t N, typename T>
1892 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1893 {
1894  Return r = Return();
1895  for (size_t i = 0; i < N; ++i) {
1896  r[i] = static_cast<typename Return::EntryType>(last[i]);
1897  }
1898  return r;
1899 }
1900 template <typename Return, std::size_t N, typename T, typename... From>
1901 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1902  const From &... xs, const T &last)
1903 {
1904  Return r = simd_cast<Return>(xs...);
1905  for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1906  r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1907  }
1908  return r;
1909 }
1910 template <typename Return, std::size_t N, typename T>
1911 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1912 {
1913  Return r = Return();
1914  for (size_t i = 0; i < Return::size(); ++i) {
1915  r[i] = static_cast<typename Return::EntryType>(last[i]);
1916  }
1917  return r;
1918 }
1919 
1920 // simd_cast_without_last (declaration) {{{2
1921 template <typename Return, typename T, typename... From>
1922 Vc_INTRINSIC_L Vc_CONST_L Return
1923  simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1924 
1925 // are_all_types_equal {{{2
1926 template <typename... Ts> struct are_all_types_equal;
1927 template <typename T>
1928 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1929 {
1930 };
1931 template <typename T0, typename T1, typename... Ts>
1932 struct are_all_types_equal<T0, T1, Ts...>
1933  : public std::integral_constant<
1934  bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1935 {
1936 };
1937 
1938 // simd_cast_interleaved_argument_order (declarations) {{{2
1958 template <typename Return, typename... Ts>
1959 Vc_INTRINSIC Vc_CONST Return
1960  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1961 
1962 // simd_cast_with_offset (declarations and one impl) {{{2
1963 // offset == 0 {{{3
1964 template <typename Return, std::size_t offset, typename From, typename... Froms>
1965 Vc_INTRINSIC Vc_CONST
1966  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1967  simd_cast_with_offset(const From &x, const Froms &... xs);
1968 // offset > 0 && offset divisible by Return::Size {{{3
1969 template <typename Return, std::size_t offset, typename From>
1970 Vc_INTRINSIC Vc_CONST
1971  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1972  simd_cast_with_offset(const From &x);
1973 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1974 template <typename Return, std::size_t offset, typename From>
1975 Vc_INTRINSIC Vc_CONST
1976  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1978  !Traits::isAtomicSimdArray<Return>::value) ||
1980  !Traits::isAtomicSimdMaskArray<Return>::value))),
1981  Return>
1982  simd_cast_with_offset(const From &x);
1983 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1984 template <typename Return, std::size_t offset, typename From>
1985 Vc_INTRINSIC Vc_CONST
1986  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1988  Traits::isAtomicSimdArray<Return>::value) ||
1990  Traits::isAtomicSimdMaskArray<Return>::value))),
1991  Return>
1992  simd_cast_with_offset(const From &x);
1993 // offset > first argument (drops first arg) {{{3
1994 template <typename Return, std::size_t offset, typename From, typename... Froms>
1995 Vc_INTRINSIC Vc_CONST enable_if<
1996  (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1997  simd_cast_with_offset(const From &, const Froms &... xs)
1998 {
1999  return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2000 }
2001 
2002 // offset > first and only argument (returns Zero) {{{3
2003 template <typename Return, std::size_t offset, typename From>
2004 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2005  const From &)
2006 {
2007  return Return(0);
2008 }
2009 
2010 // first_type_of {{{2
2011 template <typename T, typename... Ts> struct first_type_of_impl
2012 {
2013  using type = T;
2014 };
2015 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
2016 
2017 // simd_cast_drop_arguments (declarations) {{{2
2018 template <typename Return, typename From>
2019 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2020 template <typename Return, typename... Froms>
2021 Vc_INTRINSIC Vc_CONST
2022  enable_if<(are_all_types_equal<Froms...>::value &&
2023  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2024  Return>
2025  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2026 // The following function can be implemented without the sizeof...(From) overload.
2027 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2028 // function in two works around the issue.
2029 template <typename Return, typename From, typename... Froms>
2030 Vc_INTRINSIC Vc_CONST enable_if<
2031  (are_all_types_equal<From, Froms...>::value &&
2032  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2033  Return>
2034 simd_cast_drop_arguments(Froms... xs, From x, From);
2035 template <typename Return, typename From>
2036 Vc_INTRINSIC Vc_CONST
2037  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2038  simd_cast_drop_arguments(From x, From);
2039 
2040 namespace
2041 {
2042 #ifdef Vc_DEBUG_SIMD_CAST
2043 void debugDoNothing(const std::initializer_list<void *> &) {}
2044 template <typename T0, typename... Ts>
2045 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
2046  const Ts &... args)
2047 {
2048  std::cerr << prefix << arg0;
2049  debugDoNothing({&(std::cerr << ", " << args)...});
2050  std::cerr << suffix;
2051 }
2052 #else
2053 template <typename T0, typename... Ts>
2054 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2055 {
2056 }
2057 #endif
2058 } // unnamed namespace
2059 
2060 // is_less trait{{{2
2061 template <size_t A, size_t B>
2062 struct is_less : public std::integral_constant<bool, (A < B)> {
2063 };
2064 
2065 // is_power_of_2 trait{{{2
2066 template <size_t N>
2067 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2068 };
2069 
2070 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
2071 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2072  template <typename Return, typename T, typename A, typename... Froms> \
2073  Vc_INTRINSIC Vc_CONST enable_if< \
2074  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2075  is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2076  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2077  !detail::is_fixed_size_abi<A>::value), \
2078  Return> \
2079  simd_cast(NativeType_<T, A> x, Froms... xs) \
2080  { \
2081  vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2082  return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2083  } \
2084  template <typename Return, typename T, typename A, typename... Froms> \
2085  Vc_INTRINSIC Vc_CONST enable_if< \
2086  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2087  !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2088  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2089  !detail::is_fixed_size_abi<A>::value), \
2090  Return> \
2091  simd_cast(NativeType_<T, A> x, Froms... xs) \
2092  { \
2093  vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2094  return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2095  } \
2096  template <typename Return, typename T, typename A, typename... Froms> \
2097  Vc_INTRINSIC Vc_CONST \
2098  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2099  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2100  is_less<Common::left_size<Return::Size>(), \
2101  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2102  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2103  !detail::is_fixed_size_abi<A>::value), \
2104  Return> \
2105  simd_cast(NativeType_<T, A> x, Froms... xs) \
2106  { \
2107  vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2108  using R0 = typename Return::storage_type0; \
2109  using R1 = typename Return::storage_type1; \
2110  return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2111  simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2112  } \
2113  template <typename Return, typename T, typename A, typename... Froms> \
2114  Vc_INTRINSIC Vc_CONST \
2115  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2116  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2117  !is_less<Common::left_size<Return::Size>(), \
2118  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2119  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2120  !detail::is_fixed_size_abi<A>::value), \
2121  Return> \
2122  simd_cast(NativeType_<T, A> x, Froms... xs) \
2123  { \
2124  vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2125  using R0 = typename Return::storage_type0; \
2126  using R1 = typename Return::storage_type1; \
2127  return {simd_cast<R0>(x, xs...), R1(0)}; \
2128  } \
2129  Vc_NOTHING_EXPECTING_SEMICOLON
2130 
2131 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2132 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2133 #undef Vc_SIMDARRAY_CASTS
2134 
2135 // simd_cast<SimdArray/-mask, offset>(V) {{{2
2136 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2137  /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2138  template <typename Return, int offset, typename T, typename A> \
2139  Vc_INTRINSIC Vc_CONST \
2140  enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2141  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2142  { \
2143  vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2144  return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2145  } \
2146  /* both halves of Return array are extracted from argument */ \
2147  template <typename Return, int offset, typename T, typename A> \
2148  Vc_INTRINSIC Vc_CONST \
2149  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2150  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2151  Return::Size * offset + Common::left_size<Return::Size>() < \
2152  NativeType_<T, A>::Size), \
2153  Return> \
2154  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2155  { \
2156  vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2157  using R0 = typename Return::storage_type0; \
2158  constexpr int entries_offset = offset * Return::Size; \
2159  constexpr int entries_offset_right = entries_offset + R0::Size; \
2160  return { \
2161  simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2162  simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2163  x)}; \
2164  } \
2165  /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2166  /* right half of Return array is zero */ \
2167  template <typename Return, int offset, typename T, typename A> \
2168  Vc_INTRINSIC Vc_CONST \
2169  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2170  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2171  Return::Size * offset + Common::left_size<Return::Size>() >= \
2172  NativeType_<T, A>::Size), \
2173  Return> \
2174  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2175  { \
2176  vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2177  using R0 = typename Return::storage_type0; \
2178  using R1 = typename Return::storage_type1; \
2179  constexpr int entries_offset = offset * Return::Size; \
2180  return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2181  } \
2182  Vc_NOTHING_EXPECTING_SEMICOLON
2183 
2184 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2185 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2186 #undef Vc_SIMDARRAY_CASTS
2187 
2188 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
2189 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2190  /* indivisible SimdArrayType_ */ \
2191  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2192  Vc_INTRINSIC Vc_CONST \
2193  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2194  (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2195  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2196  Return> \
2197  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2198  { \
2199  vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2200  return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2201  } \
2202  /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2203  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2204  Vc_INTRINSIC Vc_CONST \
2205  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2206  (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2207  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2208  Return> \
2209  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2210  { \
2211  vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2212  return simd_cast_without_last<Return, \
2213  typename SimdArrayType_<T, N, V, N>::storage_type, \
2214  typename From::storage_type...>( \
2215  internal_data(x0), internal_data(xs)...); \
2216  } \
2217  /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2218  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2219  typename... From> \
2220  Vc_INTRINSIC Vc_CONST enable_if< \
2221  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2222  !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2223  is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2224  Return> \
2225  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2226  { \
2227  vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2228  return simd_cast_interleaved_argument_order< \
2229  Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2230  typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2231  internal_data1(x0), internal_data1(xs)...); \
2232  } \
2233  /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2234  * input can be dropped */ \
2235  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2236  typename... From> \
2237  Vc_INTRINSIC Vc_CONST enable_if< \
2238  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2239  !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2240  Return> \
2241  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2242  { \
2243  vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2244  return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2245  x0, xs...); \
2246  } \
2247  /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2248  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2249  typename... From> \
2250  Vc_INTRINSIC Vc_CONST enable_if< \
2251  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2252  N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2253  Return> \
2254  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2255  { \
2256  vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2257  return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2258  From...>(x0, xs...); \
2259  } \
2260  /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2261  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2262  typename... From> \
2263  Vc_INTRINSIC Vc_CONST enable_if< \
2264  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2265  N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2266  Return> \
2267  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2268  { \
2269  vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2270  return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2271  From...>(x0, xs...); \
2272  } \
2273  /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2274  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2275  Vc_INTRINSIC Vc_CONST \
2276  enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2277  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2278  { \
2279  vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2280  return simd_cast<Return>(internal_data0(x)); \
2281  } \
2282  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2283  Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2284  N < 2 * Return::Size && is_power_of_2<N>::value), \
2285  Return> \
2286  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2287  { \
2288  vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2289  return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2290  } \
2291  Vc_NOTHING_EXPECTING_SEMICOLON
2292 
2293 Vc_SIMDARRAY_CASTS(SimdArray);
2294 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2295 #undef Vc_SIMDARRAY_CASTS
2296 template <class Return, class T, int N, class... Ts,
2297  class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2298 Vc_INTRINSIC Return simd_cast(const fixed_size_simd<T, N> &x, const Ts &... xs)
2299 {
2300  return simd_cast<Return>(static_cast<const SimdArray<T, N> &>(x),
2301  static_cast<const SimdArray<T, N> &>(xs)...);
2302 }
2303 template <class Return, class T, int N, class... Ts,
2304  class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2305 Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask<T, N> &x, const Ts &... xs)
2306 {
2307  return simd_cast<Return>(static_cast<const SimdMaskArray<T, N> &>(x),
2308  static_cast<const SimdMaskArray<T, N> &>(xs)...);
2309 }
2310 
2311 // simd_cast<T, offset>(SimdArray/-mask) {{{2
2312 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2313  /* offset == 0 is like without offset */ \
2314  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2315  std::size_t M> \
2316  Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2317  const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2318  { \
2319  vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2320  return simd_cast<Return>(x); \
2321  } \
2322  /* forward to V */ \
2323  template <typename Return, int offset, typename T, std::size_t N, typename V> \
2324  Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2325  const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2326  { \
2327  vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2328  return simd_cast<Return, offset>(internal_data(x)); \
2329  } \
2330  /* convert from right member of SimdArray */ \
2331  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2332  std::size_t M> \
2333  Vc_INTRINSIC Vc_CONST \
2334  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2335  offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2336  Return> \
2337  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2338  { \
2339  vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2340  return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2341  internal_data1(x)); \
2342  } \
2343  /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2344  * left side of the SimdArray */ \
2345  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2346  std::size_t M> \
2347  Vc_INTRINSIC Vc_CONST \
2348  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2349  offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2350  Return> \
2351  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2352  { \
2353  vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2354  return simd_cast_with_offset<Return, \
2355  offset * Return::Size - Common::left_size<N>()>( \
2356  internal_data1(x)); \
2357  } \
2358  /* convert from left member of SimdArray */ \
2359  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2360  std::size_t M> \
2361  Vc_INTRINSIC Vc_CONST enable_if< \
2362  (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2363  offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2364  Return> \
2365  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2366  { \
2367  vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2368  return simd_cast<Return, offset>(internal_data0(x)); \
2369  } \
2370  /* fallback to copying scalars */ \
2371  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2372  std::size_t M> \
2373  Vc_INTRINSIC Vc_CONST \
2374  enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2375  offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2376  Return> \
2377  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2378  { \
2379  vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2380  using R = typename Return::EntryType; \
2381  Return r = Return(0); \
2382  for (std::size_t i = offset * Return::Size; \
2383  i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2384  r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2385  } \
2386  return r; \
2387  } \
2388  Vc_NOTHING_EXPECTING_SEMICOLON
2389 Vc_SIMDARRAY_CASTS(SimdArray);
2390 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2391 #undef Vc_SIMDARRAY_CASTS
2392 // simd_cast_drop_arguments (definitions) {{{2
2393 template <typename Return, typename From>
2394 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2395 {
2396  return simd_cast<Return>(x);
2397 }
2398 template <typename Return, typename... Froms>
2399 Vc_INTRINSIC Vc_CONST
2400  enable_if<(are_all_types_equal<Froms...>::value &&
2401  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2402  Return>
2403  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2404 {
2405  return simd_cast<Return>(xs..., x);
2406 }
2407 // The following function can be implemented without the sizeof...(From) overload.
2408 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2409 // function in two works around the issue.
2410 template <typename Return, typename From, typename... Froms>
2411 Vc_INTRINSIC Vc_CONST enable_if<
2412  (are_all_types_equal<From, Froms...>::value &&
2413  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2414  Return>
2415 simd_cast_drop_arguments(Froms... xs, From x, From)
2416 {
2417  return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2418 }
2419 template <typename Return, typename From>
2420 Vc_INTRINSIC Vc_CONST
2421  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2422  simd_cast_drop_arguments(From x, From)
2423 {
2424  return simd_cast_drop_arguments<Return>(x);
2425 }
2426 
2427 // simd_cast_with_offset (definitions) {{{2
2428  template <typename Return, std::size_t offset, typename From>
2429  Vc_INTRINSIC Vc_CONST
2430  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2431  Return> simd_cast_with_offset(const From &x)
2432 {
2433  return simd_cast<Return, offset / Return::Size>(x);
2434 }
2435 template <typename Return, std::size_t offset, typename From>
2436 Vc_INTRINSIC Vc_CONST
2437  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2439  !Traits::isAtomicSimdArray<Return>::value) ||
2441  !Traits::isAtomicSimdMaskArray<Return>::value))),
2442  Return>
2443  simd_cast_with_offset(const From &x)
2444 {
2445  using R0 = typename Return::storage_type0;
2446  using R1 = typename Return::storage_type1;
2447  return {simd_cast_with_offset<R0, offset>(x),
2448  simd_cast_with_offset<R1, offset + R0::Size>(x)};
2449 }
2450 template <typename Return, std::size_t offset, typename From>
2451 Vc_INTRINSIC Vc_CONST
2452  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2454  Traits::isAtomicSimdArray<Return>::value) ||
2456  Traits::isAtomicSimdMaskArray<Return>::value))),
2457  Return>
2458  simd_cast_with_offset(const From &x)
2459 {
2460  return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2461 }
2462 template <typename Return, std::size_t offset, typename From, typename... Froms>
2463 Vc_INTRINSIC Vc_CONST
2464  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2465  simd_cast_with_offset(const From &x, const Froms &... xs)
2466 {
2467  return simd_cast<Return>(x, xs...);
2468 }
2469 
2470 // simd_cast_without_last (definition) {{{2
2471 template <typename Return, typename T, typename... From>
2472 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2473 {
2474  return simd_cast<Return>(xs...);
2475 }
2476 
2477 // simd_cast_interleaved_argument_order (definitions) {{{2
2478 
2479 #ifdef Vc_MSVC
2480 // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2481 // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2482 // MSVC do the right thing.
2483 template <std::size_t I, typename T0>
2484 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2485 {
2486  return a0;
2487 }
2488 template <std::size_t I, typename T0>
2489 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2490 {
2491  return b0;
2492 }
2493 #endif // Vc_MSVC
2494 
2496 template <std::size_t I, typename T0, typename... Ts>
2497 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2498  const Ts &...,
2499  const T0 &,
2500  const Ts &...)
2501 {
2502  return a0;
2503 }
2505 template <std::size_t I, typename T0, typename... Ts>
2506 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2507  const Ts &...,
2508  const T0 &b0,
2509  const Ts &...)
2510 {
2511  return b0;
2512 }
2514 template <std::size_t I, typename T0, typename... Ts>
2515 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2516  const Ts &... a,
2517  const T0 &,
2518  const Ts &... b)
2519 {
2520  return extract_interleaved<I - 2, Ts...>(a..., b...);
2521 }
2523 template <typename Return, typename... Ts, std::size_t... Indexes>
2524 Vc_INTRINSIC Vc_CONST Return
2525  simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2526  const Ts &... b)
2527 {
2528  return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2529 }
2532 template <typename Return, typename... Ts>
2533 Vc_INTRINSIC Vc_CONST Return
2534  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2535 {
2536  using seq = make_index_sequence<sizeof...(Ts)*2>;
2537  return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2538 }
2539 
2540 // conditional_assign {{{1
2541 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2542  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2543  typename U> \
2544  Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2545  SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2546  { \
2547  lhs(mask) op_ rhs; \
2548  } \
2549  Vc_NOTHING_EXPECTING_SEMICOLON
2550 Vc_CONDITIONAL_ASSIGN( Assign, =);
2551 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2552 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2553 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2554 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2555 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2556 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2557 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2558 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2559 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2560 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2561 #undef Vc_CONDITIONAL_ASSIGN
2562 
2563 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2564  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2565  Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2566  conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2567  { \
2568  return expr_; \
2569  } \
2570  Vc_NOTHING_EXPECTING_SEMICOLON
2571 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2572 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2573 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2574 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2575 #undef Vc_CONDITIONAL_ASSIGN
2576 // transpose_impl {{{1
2577 namespace Common
2578 {
2579 template <typename T, size_t N, typename V>
2580 inline void transpose_impl(
2581  TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2582  const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2584 {
2585  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2586  &internal_data(*r[2]), &internal_data(*r[3])};
2587  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2588  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2589  internal_data(std::get<1>(proxy.in)),
2590  internal_data(std::get<2>(proxy.in)),
2591  internal_data(std::get<3>(proxy.in))});
2592 }
2593 
2594 template <typename T, typename V>
2595 inline void transpose_impl(
2596  TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2597  const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2599 {
2600  auto &lo = *r[0];
2601  auto &hi = *r[1];
2602  internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2603  internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2604  internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2605  internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2606  internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2607  internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2608  internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2609  internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2610 }
2611 
2612 template <typename T, typename V>
2613 inline void transpose_impl(
2614  TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2615  const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2617 {
2618  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2619  &internal_data(*r[2]), &internal_data(*r[3])};
2620  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2621  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2622  internal_data(std::get<1>(proxy.in)),
2623  internal_data(std::get<2>(proxy.in)),
2624  internal_data(std::get<3>(proxy.in))});
2625 }
2626 
2627 template <typename T, size_t N, typename V>
2628 inline void transpose_impl(
2629  TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2630  const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2632 {
2633  SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2634  SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2635  using H = SimdArray<T, 2>;
2636  transpose_impl(TransposeTag<2, 4>(), &r0[0],
2637  TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2638  internal_data0(std::get<1>(proxy.in)),
2639  internal_data0(std::get<2>(proxy.in)),
2640  internal_data0(std::get<3>(proxy.in))});
2641  transpose_impl(TransposeTag<2, 4>(), &r1[0],
2642  TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2643  internal_data1(std::get<1>(proxy.in)),
2644  internal_data1(std::get<2>(proxy.in)),
2645  internal_data1(std::get<3>(proxy.in))});
2646 }
2647 
2648 /* TODO:
2649 template <typename T, std::size_t N, typename V, std::size_t VSize>
2650 inline enable_if<(N > VSize), void> transpose_impl(
2651  std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2652  const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2653  SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2654 {
2655  typedef SimdArray<T, N, V, VSize> SA;
2656  std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2657  {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2658  &internal_data0(*r[3])}};
2659  transpose_impl(
2660  r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2661  typename SA::storage_type0, typename SA::storage_type0>{
2662  internal_data0(std::get<0>(proxy.in)),
2663  internal_data0(std::get<1>(proxy.in)),
2664  internal_data0(std::get<2>(proxy.in)),
2665  internal_data0(std::get<3>(proxy.in))});
2666 
2667  std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2668  {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2669  &internal_data1(*r[3])}};
2670  transpose_impl(
2671  r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2672  typename SA::storage_type1, typename SA::storage_type1>{
2673  internal_data1(std::get<0>(proxy.in)),
2674  internal_data1(std::get<1>(proxy.in)),
2675  internal_data1(std::get<2>(proxy.in)),
2676  internal_data1(std::get<3>(proxy.in))});
2677 }
2678 */
2679 } // namespace Common
2680 
2681 // }}}1
2682 namespace Detail
2683 {
2684 // InterleaveImpl for SimdArrays {{{
2685 // atomic {{{1
2686 template <class T, size_t N, class V, size_t VSizeof>
2687 struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2688  template <class I, class... VV>
2689  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2690  {
2691  InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2692  }
2693  template <class I, class... VV>
2694  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2695  {
2696  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2697  }
2698 };
2699 
2700 // generic (TODO) {{{1
2701 /*
2702 template <class T, size_t N, class V, size_t Wt, size_t VSizeof>
2703 struct InterleaveImpl<SimdArray<T, N, V, Wt>, N, VSizeof> {
2704  using SA = SimdArray<T, N, V, Wt>;
2705  using SA0 = typename SA::storage_type0;
2706  using SA1 = typename SA::storage_type1;
2707 
2708  template <class I, class... VV>
2709  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2710  {
2711  InterleaveImpl<SA0, SA0::size(), sizeof(SA0)>::interleave(
2712  data, i, // i needs to be split
2713  internal_data0(vv)...);
2714  InterleaveImpl<SA1, SA1::size(), sizeof(SA1)>::interleave(
2715  data, // how far to advance data?
2716  i, // i needs to be split
2717  internal_data1(vv)...);
2718  }
2719  template <class I, class... VV>
2720  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2721  {
2722  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2723  }
2724 };
2725 */
2726 } // namespace Detail
2727 // }}}
2729 
2730 } // namespace Vc_VERSIONED_NAMESPACE
2731 
2732 // numeric_limits {{{1
2733 namespace std
2734 {
2735 template <typename T, size_t N, typename V, size_t VN>
2736 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2737 private:
2738  using R = Vc::SimdArray<T, N, V, VN>;
2739 
2740 public:
2741  static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2742  static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2743  static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2744  {
2745  return numeric_limits<T>::lowest();
2746  }
2747  static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2748  {
2749  return numeric_limits<T>::epsilon();
2750  }
2751  static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2752  {
2753  return numeric_limits<T>::round_error();
2754  }
2755  static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2756  {
2757  return numeric_limits<T>::infinity();
2758  }
2759  static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2760  {
2761  return numeric_limits<T>::quiet_NaN();
2762  }
2763  static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2764  {
2765  return numeric_limits<T>::signaling_NaN();
2766  }
2767  static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2768  {
2769  return numeric_limits<T>::denorm_min();
2770  }
2771 };
2772 } // namespace std
2773 //}}}1
2774 
2775 #endif // VC_COMMON_SIMDARRAY_H_
2776 
2777 // vim: foldmethod=marker
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition: simdarray.h:1041
The main vector class for expressing data parallelism.
Definition: fwddecl.h:53
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:91
fixed_size_simd< T, N > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Definition: simdarray.h:1854
Vc::Vector< T > min(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Vector apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:117
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
static fixed_size_simd< T, N > Zero()
Returns a vector with the entries initialized to zero.
Definition: simdarray.h:700
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Definition: simdarray.h:1721
fixed_size_simd< T, N > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Definition: simdarray.h:1810
Vc::Vector< T > max(const Vc::Vector< T > &x, const Vc::Vector< T > &y)
Definition: vector.h:248
static fixed_size_simd< T, N > IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition: simdarray.h:712
Identifies any possible SimdArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:137
fixed_size_simd< T, N > atan2(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: atan2 function component-wise and concurrently.
Definition: simdarray.h:1808
fixed_size_simd< T, N > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Definition: simdarray.h:1853
Identifies any possible SimdMaskArray<T, N> type (independent of const/volatile or reference) ...
Definition: type_traits.h:143
Data-parallel arithmetic type with user-defined number of elements.
Definition: fwddecl.h:82
The value member will either be the number of SIMD vector entries or 0 if T is not a SIMD type...
Definition: type_traits.h:162
fixed_size_simd< T, N > rotated(int amount) const
Rotate vector entries to the left by amount.
Definition: simdarray.h:1255
Vector reversed() const
Returns a vector with all components reversed.
fixed_size_simd< T, N > reversed() const
Returns a vector with all components reversed.
Definition: simdarray.h:1338
Data-parallel mask type with user-defined number of boolean elements.
Definition: fwddecl.h:86
Vector sorted() const
Return a sorted copy of the vector.
Vector rotated(int amount) const
Rotate vector entries to the left by amount.
Vector shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1221
fixed_size_simd< T, N > sorted() const
Return a sorted copy of the vector.
Definition: simdarray.h:1360
fixed_size_simd_mask< T, N > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Definition: simdarray.h:1825
Identifies any SIMD vector type (independent of implementation or whether it&#39;s SimdArray<T, N>).
Definition: type_traits.h:128
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
Definition: simdarray.h:1049
static fixed_size_simd< T, N > Random()
Returns a vector with pseudo-random entries.
Definition: simdarray.h:718
Vector partialSum() const
Returns a vector containing the sum of all entries with smaller index.
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
Definition: simdarray.h:1721
static fixed_size_simd< T, N > One()
Returns a vector with the entries initialized to one.
Definition: simdarray.h:706
fixed_size_simd< T, N > apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition: simdarray.h:1113
fixed_size_simd< T, N > apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
Definition: simdarray.h:1119
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition: simdarray.h:674
value_type EntryType
The type of the elements (i.e. T)
Definition: simdarray.h:683
void deinterleave(V *a, V *b, const M *memory, A align)
Definition: deinterleave.h:76
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
void gather(const MT *mem, const IT &indexes)
Gather function.
Definition: simdarray.h:179
SimdArray(value_type a)
Broadcast Constructor.
Definition: simdarray.h:754
The main SIMD mask class.
Definition: fwddecl.h:52
void load(const EntryType *mem)
Load the vector entries from mem, overwriting the previous values.
Definition: vector.h:73
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:81
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1069
T value_type
The type of the elements (i.e. T)
Definition: simdarray.h:656
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
Definition: simdarray.h:1828
Vector Classes Namespace.
Definition: dox.h:584
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true...
Definition: types.h:86
void scatter(MT *mem, IT &&indexes) const
Scatter function.
Definition: simdarray.h:99
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
Definition: interleave.h:55
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:215
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition: simdarray.h:1034
fixed_size_simd< T, N > shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition: simdarray.h:1126
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition: simdarray.h:1847
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
Definition: simdarray.h:1817
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
Definition: simdarray.h:1834
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1813
static fixed_size_simd< T, N > generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition: simdarray.h:731
fixed_size_simd< T, N > operator+() const
Returns a copy of itself.
Definition: simdarray.h:959
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.