Vc  1.4.3
SIMD Vector Classes for C++
simdarray.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_SIMDARRAY_H_
29 #define VC_COMMON_SIMDARRAY_H_
30 
31 //#define Vc_DEBUG_SIMD_CAST 1
32 //#define Vc_DEBUG_SORTED 1
33 //#include "../IO"
34 
35 #include <array>
36 #include <limits>
37 
38 #include "writemaskedvector.h"
39 #include "simdarrayhelper.h"
40 #include "simdmaskarray.h"
41 #include "utility.h"
42 #include "interleave.h"
43 #include "indexsequence.h"
44 #include "transpose.h"
45 #include "macros.h"
46 
47 namespace Vc_VERSIONED_NAMESPACE
48 {
49 // select_best_vector_type {{{
50 namespace Common
51 {
54 
58 template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
59 // last candidate; this one must work; assume it does:
60 template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
61  using type = T;
62 };
63 // check the next candidate; use it if N >= T::size(); recurse otherwise:
64 template <std::size_t N, class T, class... Candidates>
65 struct select_best_vector_type_impl<N, T, Candidates...> {
66  using type = typename std::conditional<
67  (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
68  T>::type;
69 };
70 template <class T, std::size_t N>
71 struct select_best_vector_type : select_best_vector_type_impl<N,
72 #ifdef Vc_IMPL_AVX2
73  Vc::AVX2::Vector<T>,
74 #elif defined Vc_IMPL_AVX
75  Vc::AVX::Vector<T>,
76 #endif
77 #ifdef Vc_IMPL_SSE
78  Vc::SSE::Vector<T>,
79 #endif
80  Vc::Scalar::Vector<T>> {
81 };
83 } // namespace Common
84 // }}}
85 // internal namespace (product & sum helper) {{{1
86 namespace internal
87 {
88 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
89 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
90 } // namespace internal
91 
92 // min & max declarations {{{1
93 template <typename T, std::size_t N, typename V, std::size_t M>
94 inline fixed_size_simd<T, N> min(const SimdArray<T, N, V, M> &x,
95  const SimdArray<T, N, V, M> &y);
96 template <typename T, std::size_t N, typename V, std::size_t M>
97 inline fixed_size_simd<T, N> max(const SimdArray<T, N, V, M> &x,
98  const SimdArray<T, N, V, M> &y);
99 
100 // SimdArray class {{{1
103 
104 // atomic SimdArray {{{1
105 #define Vc_CURRENT_CLASS_NAME SimdArray
115 template <typename T, std::size_t N, typename VectorType_>
116 class SimdArray<T, N, VectorType_, N>
117 {
118  static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
119  std::is_same<T, int32_t>::value ||
120  std::is_same<T, uint32_t>::value ||
121  std::is_same<T, int16_t>::value ||
122  std::is_same<T, uint16_t>::value,
123  "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
124  "int16_t, uint16_t }");
125  static_assert(
126  std::is_same<VectorType_,
127  typename Common::select_best_vector_type<T, N>::type>::value &&
128  VectorType_::size() == N,
129  "ERROR: leave the third and fourth template parameters with their defaults. They "
130  "are implementation details.");
131 
132 public:
133  static constexpr bool is_atomic = true;
134  using VectorType = VectorType_;
135  using vector_type = VectorType;
136  using storage_type = vector_type;
137  using vectorentry_type = typename vector_type::VectorEntryType;
138  using value_type = T;
139  using mask_type = fixed_size_simd_mask<T, N>;
140  using index_type = fixed_size_simd<int, N>;
141  static constexpr std::size_t size() { return N; }
142  using Mask = mask_type;
143  using MaskType = Mask;
144  using MaskArgument = const MaskType &;
145  using VectorEntryType = vectorentry_type;
146  using EntryType = value_type;
147  using IndexType = index_type;
148  using AsArg = const SimdArray &;
149  using reference = Detail::ElementReference<SimdArray>;
150  static constexpr std::size_t Size = size();
151  static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
152 
153  // zero init
154  Vc_INTRINSIC SimdArray() = default;
155 
156  // default copy ctor/operator
157  Vc_INTRINSIC SimdArray(const SimdArray &) = default;
158  Vc_INTRINSIC SimdArray(SimdArray &&) = default;
159  Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
160 
161  // broadcast
162  Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
163  Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
164  Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
165  template <
166  typename U,
167  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
168  Vc_INTRINSIC SimdArray(U a)
169  : SimdArray(static_cast<value_type>(a))
170  {
171  }
172 
173  // implicit casts
174  template <class U, class V, class = enable_if<N == V::Size>>
175  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
176  : data(simd_cast<vector_type>(internal_data(x)))
177  {
178  }
179  template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
180  class = U>
181  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
182  : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
183  internal_data(internal_data1(x))))
184  {
185  }
186  template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
187  class = U, class = U>
188  Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
189  : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
190  internal_data(internal_data1(internal_data0(x))),
191  internal_data(internal_data0(internal_data1(x))),
192  internal_data(internal_data1(internal_data1(x)))))
193  {
194  }
195 
196  template <typename V, std::size_t Pieces, std::size_t Index>
197  Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
198  : data(simd_cast<vector_type, Index>(x.data))
199  {
200  }
201 
202  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
203  : data(init.begin(), Vc::Unaligned)
204  {
205  Vc_ASSERT(init.size() == size());
206  }
207 
208  // implicit conversion from underlying vector_type
209  template <
210  typename V,
211  typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
212  Vc_INTRINSIC SimdArray(const V &x)
213  : data(simd_cast<vector_type>(x))
214  {
215  }
216 
217  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
218  // T implicitly convertible to U
219  template <typename U, typename A,
220  typename =
221  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
222  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
223  Vc_INTRINSIC operator Vector<U, A>() const
224  {
225  return simd_cast<Vector<U, A>>(data);
226  }
227  operator fixed_size_simd<T, N> &()
228  {
229  return static_cast<fixed_size_simd<T, N> &>(*this);
230  }
231  operator const fixed_size_simd<T, N> &() const
232  {
233  return static_cast<const fixed_size_simd<T, N> &>(*this);
234  }
235 
236 #include "gatherinterface.h"
237 #include "scatterinterface.h"
238 
239  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
240  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
241  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
242  {
243  }
244  template <std::size_t Offset>
245  explicit Vc_INTRINSIC SimdArray(
246  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
247  : data(Vc::IndexesFromZero)
248  {
249  data += value_type(Offset);
250  }
251 
252  Vc_INTRINSIC void setZero() { data.setZero(); }
253  Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
254  Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
255  Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
256 
257  Vc_INTRINSIC void setQnan() { data.setQnan(); }
258  Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
259 
260  // internal: execute specified Operation
261  template <typename Op, typename... Args>
262  static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
263  {
264  fixed_size_simd<T, N> r;
265  Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
266  return r;
267  }
268 
269  template <typename Op, typename... Args>
270  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
271  {
272  Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
273  }
274 
275  static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
276  {
277  return SimdArray(Vc::Zero);
278  }
279  static Vc_INTRINSIC fixed_size_simd<T, N> One()
280  {
281  return SimdArray(Vc::One);
282  }
283  static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
284  {
285  return SimdArray(Vc::IndexesFromZero);
286  }
287  static Vc_INTRINSIC fixed_size_simd<T, N> Random()
288  {
289  return fromOperation(Common::Operations::random());
290  }
291 
292  // load ctor
293  template <class U, class Flags = DefaultLoadTag,
294  class = enable_if<std::is_arithmetic<U>::value &&
295  Traits::is_load_store_flag<Flags>::value>>
296  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {}) : data(mem, f)
297  {
298  }
299 
300  template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
301  {
302  data.load(std::forward<Args>(args)...);
303  }
304 
305  template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
306  {
307  data.store(std::forward<Args>(args)...);
308  }
309 
310  Vc_INTRINSIC mask_type operator!() const
311  {
312  return {private_init, !data};
313  }
314 
315  Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
316  {
317  return {private_init, -data};
318  }
319 
321  Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
322 
323  Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
324  {
325  return {private_init, ~data};
326  }
327 
328  template <typename U,
329  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
330  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
331  {
332  return {private_init, data << x};
333  }
334  template <typename U,
335  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
336  Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
337  {
338  data <<= x;
339  return *this;
340  }
341  template <typename U,
342  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
343  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
344  {
345  return {private_init, data >> x};
346  }
347  template <typename U,
348  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
349  Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
350  {
351  data >>= x;
352  return *this;
353  }
354 
355 #define Vc_BINARY_OPERATOR_(op) \
356  Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
357  { \
358  data op## = rhs.data; \
359  return *this; \
360  }
361  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
362  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
363  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
364 #undef Vc_BINARY_OPERATOR_
365 
367  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
368  {
369  return {private_init, isnegative(data)};
370  }
371 
372 private:
373  friend reference;
374  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
375  {
376  return o.data[i];
377  }
378  template <typename U>
379  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
380  noexcept(std::declval<value_type &>() = v))
381  {
382  o.data[i] = v;
383  }
384 
385 public:
392  Vc_INTRINSIC reference operator[](size_t i) noexcept
393  {
394  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
395  return {*this, int(i)};
396  }
397  Vc_INTRINSIC value_type operator[](size_t i) const noexcept
398  {
399  return get(*this, int(i));
400  }
401 
402  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
403  {
404  return {*this, k};
405  }
406 
407  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
408  {
409  data.assign(v.data, internal_data(k));
410  }
411 
412  // reductions ////////////////////////////////////////////////////////
413 #define Vc_REDUCTION_FUNCTION_(name_) \
414  Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
415  Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
416  { \
417  return data.name_(internal_data(mask)); \
418  } \
419  Vc_NOTHING_EXPECTING_SEMICOLON
420  Vc_REDUCTION_FUNCTION_(min);
421  Vc_REDUCTION_FUNCTION_(max);
422  Vc_REDUCTION_FUNCTION_(product);
423  Vc_REDUCTION_FUNCTION_(sum);
424 #undef Vc_REDUCTION_FUNCTION_
425  Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
426  {
427  return {private_init, data.partialSum()};
428  }
429 
430  template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f) const
431  {
432  return {private_init, data.apply(std::forward<F>(f))};
433  }
434  template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
435  {
436  return {private_init, data.apply(std::forward<F>(f), k)};
437  }
438 
439  Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount) const
440  {
441  return {private_init, data.shifted(amount)};
442  }
443 
444  template <std::size_t NN>
445  Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
446  const
447  {
448  return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
449  }
450 
451  Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
452  {
453  return {private_init, data.rotated(amount)};
454  }
455 
457  Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
458  {
459  return {private_init, exponent(data)};
460  }
461 
462  Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x) const
463  {
464  return {private_init, data.interleaveLow(x.data)};
465  }
466  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x) const
467  {
468  return {private_init, data.interleaveHigh(x.data)};
469  }
470 
471  Vc_INTRINSIC fixed_size_simd<T, N> reversed() const
472  {
473  return {private_init, data.reversed()};
474  }
475 
476  Vc_INTRINSIC fixed_size_simd<T, N> sorted() const
477  {
478  return {private_init, data.sorted()};
479  }
480 
481  template <class G, class = decltype(std::declval<G>()(std::size_t())),
482  class = enable_if<!Traits::is_simd_vector<G>::value>>
483  Vc_INTRINSIC SimdArray(const G &gen) : data(gen)
484  {
485  }
486  template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
487  {
488  return {private_init, VectorType::generate(gen)};
489  }
490 
491  Vc_DEPRECATED("use copysign(x, y) instead")
492  Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
493  {
494  return {private_init, Vc::copysign(data, x.data)};
495  }
496 
497  friend VectorType &internal_data<>(SimdArray &x);
498  friend const VectorType &internal_data<>(const SimdArray &x);
499 
501  Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
502 
503  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
504 
505 private:
506  // The alignas attribute attached to the class declaration above is ignored by ICC
507  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
508  // all compilers.
509  alignas(static_cast<std::size_t>(
510  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
511  VectorType_::size()>::value)) storage_type data;
512 };
513 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
514 template <typename T, std::size_t N, typename VectorType>
516 template <typename T, std::size_t N, typename VectorType>
517 #ifndef Vc_MSVC
518 Vc_INTRINSIC
519 #endif
520 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
521 {
522  return x.data;
523 }
524 template <typename T, std::size_t N, typename VectorType>
525 #ifndef Vc_MSVC
526 Vc_INTRINSIC
527 #endif
528 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
529 {
530  return x.data;
531 }
532 
533 // unwrap {{{2
534 template <class T> Vc_INTRINSIC T unwrap(const T &x) { return x; }
535 
536 template <class T, size_t N, class V>
537 Vc_INTRINSIC V unwrap(const SimdArray<T, N, V, N> &x)
538 {
539  return internal_data(x);
540 }
541 
542 template <class T, size_t Pieces, size_t Index>
543 Vc_INTRINSIC auto unwrap(const Common::Segment<T, Pieces, Index> &x)
544  -> decltype(x.to_fixed_size())
545 {
546  return unwrap(x.to_fixed_size());
547 }
548 
549 // gatherImplementation {{{2
550 template <typename T, std::size_t N, typename VectorType>
551 template <class MT, class IT, int Scale>
552 Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
553  const Common::GatherArguments<MT, IT, Scale> &args)
554 {
555  data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
556 }
557 template <typename T, std::size_t N, typename VectorType>
558 template <class MT, class IT, int Scale>
559 Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
560  const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
561 {
562  data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
563  mask);
564 }
565 
566 // scatterImplementation {{{2
567 template <typename T, std::size_t N, typename VectorType>
568 template <typename MT, typename IT>
569 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
570  IT &&indexes) const
571 {
572  data.scatter(mem, unwrap(std::forward<IT>(indexes)));
573 }
574 template <typename T, std::size_t N, typename VectorType>
575 template <typename MT, typename IT>
576 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
577  IT &&indexes,
578  MaskArgument mask) const
579 {
580  data.scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
581 }
582 
583 // generic SimdArray {{{1
616 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
617 {
618  static_assert(std::is_same<T, double>::value ||
619  std::is_same<T, float>::value ||
620  std::is_same<T, int32_t>::value ||
621  std::is_same<T, uint32_t>::value ||
622  std::is_same<T, int16_t>::value ||
623  std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
624  static_assert(
625  std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
626  V::size() == Wt,
627  "ERROR: leave the third and fourth template parameters with their defaults. They "
628  "are implementation details.");
629  static_assert(
630  // either the EntryType and VectorEntryType of the main V are equal
631  std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
632  // or N is a multiple of V::size()
633  (N % V::size() == 0),
634  "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
635  "MIC::(u)short_v::size(), i.e. k * 16.");
636 
637  using my_traits = SimdArrayTraits<T, N>;
638  static constexpr std::size_t N0 = my_traits::N0;
639  static constexpr std::size_t N1 = my_traits::N1;
640  using Split = Common::Split<N0>;
641  template <typename U, std::size_t K> using CArray = U[K];
642 
643 public:
644  static constexpr bool is_atomic = false;
645  using storage_type0 = typename my_traits::storage_type0;
646  using storage_type1 = typename my_traits::storage_type1;
647  static_assert(storage_type0::size() == N0, "");
648 
652  using vector_type = V;
653  using vectorentry_type = typename storage_type0::vectorentry_type;
654  typedef vectorentry_type alias_type Vc_MAY_ALIAS;
655 
657  using value_type = T;
658 
661 
664 
675  static constexpr std::size_t size() { return N; }
676 
678  using Mask = mask_type;
680  using MaskType = Mask;
681  using MaskArgument = const MaskType &;
682  using VectorEntryType = vectorentry_type;
687  using AsArg = const SimdArray &;
688 
689  using reference = Detail::ElementReference<SimdArray>;
690 
692  static constexpr std::size_t MemoryAlignment =
696 
699 
701  static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
702  {
703  return SimdArray(Vc::Zero);
704  }
705 
707  static Vc_INTRINSIC fixed_size_simd<T, N> One()
708  {
709  return SimdArray(Vc::One);
710  }
711 
714  {
716  }
717 
719  static Vc_INTRINSIC fixed_size_simd<T, N> Random()
720  {
721  return fromOperation(Common::Operations::random());
722  }
723 
724  template <class G, class = decltype(std::declval<G>()(std::size_t())),
725  class = enable_if<!Traits::is_simd_vector<G>::value>>
726  Vc_INTRINSIC SimdArray(const G &gen)
727  : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); })
728  {
729  }
730 
732  template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen) // {{{2
733  {
734  auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
735  // an initializer list is well-defined
736  // (front to back), but GCC 4.8 doesn't
737  // implement this correctly. Therefore
738  // we enforce correct order.
739  return {std::move(tmp),
740  storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
741  }
743 
746 
748  SimdArray() = default;
750 
753 
755  Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
756  template <
757  typename U,
758  typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
759  SimdArray(U a)
760  : SimdArray(static_cast<value_type>(a))
761  {
762  }
764 
765  // default copy ctor/operator
766  SimdArray(const SimdArray &) = default;
767  SimdArray(SimdArray &&) = default;
768  SimdArray &operator=(const SimdArray &) = default;
769 
770  // load ctor
771  template <typename U, typename Flags = DefaultLoadTag,
772  typename = enable_if<std::is_arithmetic<U>::value &&
773  Traits::is_load_store_flag<Flags>::value>>
774  explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {})
775  : data0(mem, f), data1(mem + storage_type0::size(), f)
776  {
777  }
778 
779 // MSVC does overload resolution differently and takes the const U *mem overload (I hope)
780 #ifndef Vc_MSVC
787  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
788  typename = enable_if<std::is_arithmetic<U>::value &&
789  Traits::is_load_store_flag<Flags>::value>>
790  explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = {})
791  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
792  {
793  }
797  template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
798  typename = enable_if<std::is_arithmetic<U>::value &&
799  Traits::is_load_store_flag<Flags>::value>>
800  explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = {})
801  : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
802  {
803  }
804 #endif
805 
806  // initializer list
807  Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
808  : data0(init.begin(), Vc::Unaligned)
809  , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
810  {
811  Vc_ASSERT(init.size() == size());
812  }
813 
814 #include "gatherinterface.h"
815 #include "scatterinterface.h"
816 
817  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
818  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
819  explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
820  : data0(i)
821  , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
822  storage_type0::size()>())
823  {
824  }
825  template <size_t Offset>
826  explicit Vc_INTRINSIC SimdArray(
827  Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
828  : data0(i)
829  , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
830  storage_type0::size() + Offset>())
831  {
832  }
833 
834  // explicit casts
835  template <class W, class = enable_if<
836  (Traits::is_simd_vector<W>::value &&
837  Traits::simd_vector_size<W>::value == N &&
838  !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
839  Traits::isSimdArray<W>::value))>>
840  Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
841  {
842  }
843 
844  // implicit casts
845  template <class W, class = enable_if<
846  (Traits::isSimdArray<W>::value &&
847  Traits::simd_vector_size<W>::value == N &&
848  std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
849  class = W>
850  Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
851  {
852  }
853 
854  template <class W, std::size_t Pieces, std::size_t Index>
855  Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
856  : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
857  , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
858  {
859  }
860 
861  // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
862  // T implicitly convertible to U
863  template <typename U, typename A,
864  typename =
865  enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
866  !std::is_same<A, simd_abi::fixed_size<N>>::value>>
867  operator Vector<U, A>() const
868  {
869  auto r = simd_cast<Vector<U, A>>(data0, data1);
870  return r;
871  }
872  Vc_INTRINSIC operator fixed_size_simd<T, N> &()
873  {
874  return static_cast<fixed_size_simd<T, N> &>(*this);
875  }
876  Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
877  {
878  return static_cast<const fixed_size_simd<T, N> &>(*this);
879  }
880 
882 
883  Vc_INTRINSIC void setZero()
884  {
885  data0.setZero();
886  data1.setZero();
887  }
888  Vc_INTRINSIC void setZero(const mask_type &k)
889  {
890  data0.setZero(Split::lo(k));
891  data1.setZero(Split::hi(k));
892  }
893  Vc_INTRINSIC void setZeroInverted()
894  {
895  data0.setZeroInverted();
896  data1.setZeroInverted();
897  }
898  Vc_INTRINSIC void setZeroInverted(const mask_type &k)
899  {
900  data0.setZeroInverted(Split::lo(k));
901  data1.setZeroInverted(Split::hi(k));
902  }
903 
904 
905  Vc_INTRINSIC void setQnan() {
906  data0.setQnan();
907  data1.setQnan();
908  }
909  Vc_INTRINSIC void setQnan(const mask_type &m) {
910  data0.setQnan(Split::lo(m));
911  data1.setQnan(Split::hi(m));
912  }
913 
915  template <typename Op, typename... Args>
916  static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
917  {
918  fixed_size_simd<T, N> r = {
919  storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
920  // could move and thus
921  // break the next line
922  storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
923  return r;
924  }
925 
927  template <typename Op, typename... Args>
928  static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
929  {
930  storage_type0::callOperation(op, Split::lo(args)...);
931  storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
932  }
933 
934 
935  template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
936  {
937  data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
938  // break the next line
939  data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
940  }
941 
942  template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
943  {
944  data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
945  // break the next line
946  data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
947  }
948 
949  Vc_INTRINSIC mask_type operator!() const
950  {
951  return {!data0, !data1};
952  }
953 
954  Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
955  {
956  return {-data0, -data1};
957  }
958 
960  Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
961 
962  Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
963  {
964  return {~data0, ~data1};
965  }
966 
967  // left/right shift operators {{{2
968  template <typename U,
969  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
970  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
971  {
972  return {data0 << x, data1 << x};
973  }
974  template <typename U,
975  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
976  Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
977  {
978  data0 <<= x;
979  data1 <<= x;
980  return *this;
981  }
982  template <typename U,
983  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
984  Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
985  {
986  return {data0 >> x, data1 >> x};
987  }
988  template <typename U,
989  typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
990  Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
991  {
992  data0 >>= x;
993  data1 >>= x;
994  return *this;
995  }
996 
997  // binary operators {{{2
998 #define Vc_BINARY_OPERATOR_(op) \
999  Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
1000  { \
1001  data0 op## = rhs.data0; \
1002  data1 op## = rhs.data1; \
1003  return *this; \
1004  }
1005  Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1006  Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1007  Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1008 #undef Vc_BINARY_OPERATOR_
1009 
1010  // operator[] {{{2
1013 
1014 private:
1015  friend reference;
1016  Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
1017  {
1018  return reinterpret_cast<const alias_type *>(&o)[i];
1019  }
1020  template <typename U>
1021  Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
1022  noexcept(std::declval<value_type &>() = v))
1023  {
1024  reinterpret_cast<alias_type *>(&o)[i] = v;
1025  }
1026 
1027 public:
1029 
1035  Vc_INTRINSIC reference operator[](size_t i) noexcept
1036  {
1037  static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1038  return {*this, int(i)};
1039  }
1040 
1042  Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1043  {
1044  return get(*this, int(index));
1045  }
1047 
1048  // operator(){{{2
1050  Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1051  const mask_type &mask)
1052  {
1053  return {*this, mask};
1054  }
1055 
1057  Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
1058  {
1059  data0.assign(v.data0, internal_data0(k));
1060  data1.assign(v.data1, internal_data1(k));
1061  }
1062 
1063  // reductions {{{2
1064 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1065 private: \
1066  template <typename ForSfinae = void> \
1067  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1068  storage_type0::Size == storage_type1::Size, \
1069  value_type> name_##_impl() const \
1070  { \
1071  return binary_fun_(data0, data1).name_(); \
1072  } \
1073  \
1074  template <typename ForSfinae = void> \
1075  Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1076  storage_type0::Size != storage_type1::Size, \
1077  value_type> name_##_impl() const \
1078  { \
1079  return scalar_fun_(data0.name_(), data1.name_()); \
1080  } \
1081  \
1082 public: \
1083  \
1084  Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1085  \
1086  Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1087  { \
1088  if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1089  return data1.name_(Split::hi(mask)); \
1090  } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1091  return data0.name_(Split::lo(mask)); \
1092  } else { \
1093  return scalar_fun_(data0.name_(Split::lo(mask)), \
1094  data1.name_(Split::hi(mask))); \
1095  } \
1096  } \
1097  Vc_NOTHING_EXPECTING_SEMICOLON
1098  Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1099  Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1100  Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1101  Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1102 #undef Vc_REDUCTION_FUNCTION_
1104  Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const //{{{2
1105  {
1106  auto ps0 = data0.partialSum();
1107  auto tmp = data1;
1108  tmp[0] += ps0[data0.size() - 1];
1109  return {std::move(ps0), tmp.partialSum()};
1110  }
1111 
1112  // apply {{{2
1114  template <typename F> inline fixed_size_simd<T, N> apply(F &&f) const
1115  {
1116  return {data0.apply(f), data1.apply(f)};
1117  }
1119  template <typename F>
1120  inline fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
1121  {
1122  return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1123  }
1124 
1125  // shifted {{{2
1127  inline fixed_size_simd<T, N> shifted(int amount) const
1128  {
1129  constexpr int SSize = Size;
1130  constexpr int SSize0 = storage_type0::Size;
1131  constexpr int SSize1 = storage_type1::Size;
1132  if (amount == 0) {
1133  return *this;
1134  }
1135  if (amount < 0) {
1136  if (amount > -SSize0) {
1137  return {data0.shifted(amount), data1.shifted(amount, data0)};
1138  }
1139  if (amount == -SSize0) {
1140  return {storage_type0(0), simd_cast<storage_type1>(data0)};
1141  }
1142  if (amount < -SSize0) {
1143  return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1144  amount + SSize0))};
1145  }
1146  return Zero();
1147  } else {
1148  if (amount >= SSize) {
1149  return Zero();
1150  } else if (amount >= SSize0) {
1151  return {
1152  simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1153  storage_type1(0)};
1154  } else if (amount >= SSize1) {
1155  return {data0.shifted(amount, data1), storage_type1(0)};
1156  } else {
1157  return {data0.shifted(amount, data1), data1.shifted(amount)};
1158  }
1159  }
1160  }
1161 
1162  template <std::size_t NN>
1163  inline enable_if<
1164  !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1165  N == NN),
1167  shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1168  {
1169  constexpr int SSize = Size;
1170  if (amount < 0) {
1171  return fixed_size_simd<T, N>([&](int i) -> value_type {
1172  i += amount;
1173  if (i >= 0) {
1174  return operator[](i);
1175  } else if (i >= -SSize) {
1176  return shiftIn[i + SSize];
1177  }
1178  return 0;
1179  });
1180  }
1181  return fixed_size_simd<T, N>([&](int i) -> value_type {
1182  i += amount;
1183  if (i < SSize) {
1184  return operator[](i);
1185  } else if (i < 2 * SSize) {
1186  return shiftIn[i - SSize];
1187  }
1188  return 0;
1189  });
1190  }
1191 
1192 private:
1193  // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1194  // expression directly in the enable_if below
1195  template <std::size_t NN> struct bisectable_shift
1196  : public std::integral_constant<bool,
1197  std::is_same<storage_type0, storage_type1>::value && // bisectable
1198  N == NN>
1199  {
1200  };
1201 
1202 public:
1203  template <std::size_t NN>
1204  inline fixed_size_simd<T, N> shifted(
1205  enable_if<bisectable_shift<NN>::value, int> amount,
1206  const SimdArray<value_type, NN> &shiftIn) const
1207  {
1208  constexpr int SSize = Size;
1209  if (amount < 0) {
1210  if (amount > -static_cast<int>(storage_type0::Size)) {
1211  return {data0.shifted(amount, internal_data1(shiftIn)),
1212  data1.shifted(amount, data0)};
1213  }
1214  if (amount == -static_cast<int>(storage_type0::Size)) {
1215  return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1216  }
1217  if (amount > -SSize) {
1218  return {
1219  internal_data1(shiftIn)
1220  .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1221  data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1222  }
1223  if (amount == -SSize) {
1224  return shiftIn;
1225  }
1226  if (amount > -2 * SSize) {
1227  return shiftIn.shifted(amount + SSize);
1228  }
1229  }
1230  if (amount == 0) {
1231  return *this;
1232  }
1233  if (amount < static_cast<int>(storage_type0::Size)) {
1234  return {data0.shifted(amount, data1),
1235  data1.shifted(amount, internal_data0(shiftIn))};
1236  }
1237  if (amount == static_cast<int>(storage_type0::Size)) {
1238  return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1239  }
1240  if (amount < SSize) {
1241  return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1242  internal_data0(shiftIn)
1243  .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1244  }
1245  if (amount == SSize) {
1246  return shiftIn;
1247  }
1248  if (amount < 2 * SSize) {
1249  return shiftIn.shifted(amount - SSize);
1250  }
1251  return Zero();
1252  }
1253 
1254  // rotated {{{2
1256  Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
1257  {
1258  amount %= int(size());
1259  if (amount == 0) {
1260  return *this;
1261  } else if (amount < 0) {
1262  amount += size();
1263  }
1264 
1265 #ifdef Vc_MSVC
1266  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1267  // ->
1268  // load to implement the function instead.
1269  alignas(MemoryAlignment) T tmp[N + data0.size()];
1270  data0.store(&tmp[0], Vc::Aligned);
1271  data1.store(&tmp[data0.size()], Vc::Aligned);
1272  data0.store(&tmp[N], Vc::Unaligned);
1274  r.data0.load(&tmp[amount], Vc::Unaligned);
1275  r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1276  return r;
1277 #else
1278  auto &&d0cvtd = simd_cast<storage_type1>(data0);
1279  auto &&d1cvtd = simd_cast<storage_type0>(data1);
1280  constexpr int size0 = storage_type0::size();
1281  constexpr int size1 = storage_type1::size();
1282 
1283  if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1284  return {std::move(d1cvtd), std::move(d0cvtd)};
1285  } else if (amount < size1) {
1286  return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1287  } else if (amount == size1) {
1288  return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1289  } else if (int(size()) - amount < size1) {
1290  return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1291  data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1292  } else if (int(size()) - amount == size1) {
1293  return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1294  simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1295  } else if (amount <= size0) {
1296  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1297  simd_cast<storage_type1>(data0.shifted(amount - size1))};
1298  } else {
1299  return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1300  simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1301  }
1302  return *this;
1303 #endif
1304  }
1305 
1306  // interleaveLow/-High {{{2
1308  Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(const SimdArray &x) const
1309  {
1310  // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1311  return {data0.interleaveLow(x.data0),
1312  simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1313  }
1315  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(const SimdArray &x) const
1316  {
1317  return interleaveHighImpl(
1318  x,
1319  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1320  }
1321 
1322 private:
1324  Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::true_type) const
1325  {
1326  return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1327  }
1329  inline fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::false_type) const
1330  {
1331  return {data0.interleaveHigh(x.data0)
1332  .shifted(storage_type1::Size,
1333  simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1334  data1.interleaveHigh(x.data1)};
1335  }
1336 
1337 public:
1339  inline fixed_size_simd<T, N> reversed() const //{{{2
1340  {
1341  if (std::is_same<storage_type0, storage_type1>::value) {
1342  return {simd_cast<storage_type0>(data1).reversed(),
1343  simd_cast<storage_type1>(data0).reversed()};
1344  } else {
1345 #ifdef Vc_MSVC
1346  // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1347  // store
1348  // -> load to implement the function instead.
1349  alignas(MemoryAlignment) T tmp[N];
1350  data1.reversed().store(&tmp[0], Vc::Aligned);
1351  data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1352  return fixed_size_simd<T, N>{&tmp[0], Vc::Aligned};
1353 #else
1354  return {data0.shifted(storage_type1::Size, data1).reversed(),
1355  simd_cast<storage_type1>(data0.reversed().shifted(
1356  storage_type0::Size - storage_type1::Size))};
1357 #endif
1358  }
1359  }
1361  inline fixed_size_simd<T, N> sorted() const //{{{2
1362  {
1363  return sortedImpl(
1364  std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1365  }
1366 
1368  Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::true_type) const
1369  {
1370 #ifdef Vc_DEBUG_SORTED
1371  std::cerr << "-- " << data0 << data1 << '\n';
1372 #endif
1373  const auto a = data0.sorted();
1374  const auto b = data1.sorted().reversed();
1375  const auto lo = Vc::min(a, b);
1376  const auto hi = Vc::max(a, b);
1377  return {lo.sorted(), hi.sorted()};
1378  }
1379 
1381  Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type) const
1382  {
1383  using SortableArray =
1384  fixed_size_simd<value_type, Common::NextPowerOfTwo<size()>::value>;
1385  auto sortable = simd_cast<SortableArray>(*this);
1386  for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1387  using limits = std::numeric_limits<value_type>;
1388  if (limits::has_infinity) {
1389  sortable[i] = limits::infinity();
1390  } else {
1391  sortable[i] = std::numeric_limits<value_type>::max();
1392  }
1393  }
1394  return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1395 
1396  /* The following implementation appears to be less efficient. But this may need further
1397  * work.
1398  const auto a = data0.sorted();
1399  const auto b = data1.sorted();
1400 #ifdef Vc_DEBUG_SORTED
1401  std::cerr << "== " << a << b << '\n';
1402 #endif
1403  auto aIt = Vc::begin(a);
1404  auto bIt = Vc::begin(b);
1405  const auto aEnd = Vc::end(a);
1406  const auto bEnd = Vc::end(b);
1407  return SimdArray::generate([&](std::size_t) {
1408  if (aIt == aEnd) {
1409  return *(bIt++);
1410  }
1411  if (bIt == bEnd) {
1412  return *(aIt++);
1413  }
1414  if (*aIt < *bIt) {
1415  return *(aIt++);
1416  } else {
1417  return *(bIt++);
1418  }
1419  });
1420  */
1421  }
1422 
1425 
1428  static constexpr std::size_t Size = size();
1429 
1431  Vc_DEPRECATED("use exponent(x) instead")
1432  Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
1433  {
1434  return {exponent(data0), exponent(data1)};
1435  }
1436 
1438  Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1439  {
1440  return {isnegative(data0), isnegative(data1)};
1441  }
1442 
1444  Vc_DEPRECATED("use copysign(x, y) instead")
1445  Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
1446  {
1447  return {Vc::copysign(data0, x.data0),
1448  Vc::copysign(data1, x.data1)};
1449  }
1451 
1452  // internal_data0/1 {{{2
1453  friend storage_type0 &internal_data0<>(SimdArray &x);
1454  friend storage_type1 &internal_data1<>(SimdArray &x);
1455  friend const storage_type0 &internal_data0<>(const SimdArray &x);
1456  friend const storage_type1 &internal_data1<>(const SimdArray &x);
1457 
1459  Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1460  : data0(std::move(x)), data1(std::move(y))
1461  {
1462  }
1463 
1464  Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1465 
1466 private: //{{{2
1467  // The alignas attribute attached to the class declaration above is ignored by ICC
1468  // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1469  // all compilers.
1470  alignas(static_cast<std::size_t>(
1471  Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1472  V::size()>::value)) storage_type0 data0;
1473  storage_type1 data1;
1474 };
1475 #undef Vc_CURRENT_CLASS_NAME
1476 template <typename T, std::size_t N, typename V, std::size_t M>
1477 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1478 template <typename T, std::size_t N, typename V, std::size_t M>
1479 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1480 
1481 // gatherImplementation {{{2
1482 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1483 template <class MT, class IT, int Scale>
1485  const Common::GatherArguments<MT, IT, Scale> &args)
1486 {
1487  data0.gather(Common::make_gather<Scale>(
1488  args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1489  data1.gather(Common::make_gather<Scale>(
1490  args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1491 }
1492 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1493 template <class MT, class IT, int Scale>
1494 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1495  const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
1496 {
1497  data0.gather(Common::make_gather<Scale>(
1498  args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1499  Split::lo(mask));
1500  data1.gather(Common::make_gather<Scale>(
1501  args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1502  Split::hi(mask));
1503 }
1504 
1505 // scatterImplementation {{{2
1506 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1507 template <typename MT, typename IT>
1508 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1509  IT &&indexes) const
1510 {
1511  data0.scatter(mem, Split::lo(Common::Operations::gather(),
1512  indexes)); // don't forward indexes - it could move and
1513  // thus break the next line
1514  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1515 }
1516 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1517 template <typename MT, typename IT>
1518 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1519  IT &&indexes, MaskArgument mask) const
1520 {
1521  data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1522  Split::lo(mask)); // don't forward indexes - it could move and
1523  // thus break the next line
1524  data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1525  Split::hi(mask));
1526 }
1527 
1528 // internal_data0/1 (SimdArray) {{{1
1530 template <typename T, std::size_t N, typename V, std::size_t M>
1531 #ifndef Vc_MSVC
1532 Vc_INTRINSIC
1533 #endif
1534 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1535  SimdArray<T, N, V, M> &x)
1536 {
1537  return x.data0;
1538 }
1540 template <typename T, std::size_t N, typename V, std::size_t M>
1541 #ifndef Vc_MSVC
1542 Vc_INTRINSIC
1543 #endif
1544 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1545  SimdArray<T, N, V, M> &x)
1546 {
1547  return x.data1;
1548 }
1550 template <typename T, std::size_t N, typename V, std::size_t M>
1551 #ifndef Vc_MSVC
1552 Vc_INTRINSIC
1553 #endif
1554 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1555  const SimdArray<T, N, V, M> &x)
1556 {
1557  return x.data0;
1558 }
1560 template <typename T, std::size_t N, typename V, std::size_t M>
1561 #ifndef Vc_MSVC
1562 Vc_INTRINSIC
1563 #endif
1564 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1565  const SimdArray<T, N, V, M> &x)
1566 {
1567  return x.data1;
1568 }
1569 
1570 // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1571 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1572 // in the body the bug is supressed.
1573 #if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1574 template <>
1575 Vc_INTRINSIC SimdArray<double, 8>::SimdArray(fixed_size_simd<double, 4> &&x,
1576  fixed_size_simd<double, 4> &&y)
1577  : data0(x), data1(0)
1578 {
1579  data1 = y;
1580 }
1581 #endif
1582 
1583 // binary operators {{{
1584 namespace Detail
1585 {
1586 #define Vc_FIXED_OP(op) \
1587  template <class T, int N, \
1588  class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1589  Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1590  const fixed_size_simd<T, N> &b) \
1591  { \
1592  return {private_init, internal_data(a) op internal_data(b)}; \
1593  } \
1594  template <class T, int N, \
1595  class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1596  class = T> \
1597  Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1598  const fixed_size_simd<T, N> &b) \
1599  { \
1600  return {internal_data0(a) op internal_data0(b), \
1601  internal_data1(a) op internal_data1(b)}; \
1602  }
1603 Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1604 Vc_ALL_BINARY(Vc_FIXED_OP);
1605 Vc_ALL_SHIFTS(Vc_FIXED_OP);
1606 #undef Vc_FIXED_OP
1607 #define Vc_FIXED_OP(op) \
1608  template <class T, int N, \
1609  class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1610  Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1611  const fixed_size_simd<T, N> &b) \
1612  { \
1613  return {private_init, internal_data(a) op internal_data(b)}; \
1614  } \
1615  template <class T, int N, \
1616  class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1617  class = T> \
1618  Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1619  const fixed_size_simd<T, N> &b) \
1620  { \
1621  return {internal_data0(a) op internal_data0(b), \
1622  internal_data1(a) op internal_data1(b)}; \
1623  }
1624 Vc_ALL_COMPARES(Vc_FIXED_OP);
1625 #undef Vc_FIXED_OP
1626 } // namespace Detail
1627 
1628 // }}}
1629 // binary operators {{{1
1630 namespace result_vector_type_internal
1631 {
1632 template <typename T>
1633 using remove_cvref = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1634 
1635 template <typename T>
1636 using is_integer_larger_than_int = std::integral_constant<
1637  bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1638  std::is_same<T, long>::value ||
1639  std::is_same<T, unsigned long>::value)>;
1640 
1641 template <
1642  typename L, typename R,
1643  std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1644  : Traits::simd_vector_size<R>::value,
1645  bool = (Traits::isSimdArray<L>::value ||
1646  Traits::isSimdArray<R>::value) && // one of the operands must be a SimdArray
1647  !(Traits::is_fixed_size_simd<L>::value && // if both are fixed_size, use
1648  Traits::is_fixed_size_simd<R>::value) && // common/operators.h
1649  ((std::is_arithmetic<remove_cvref<L>>::value && // one of the operands is a
1650  !is_integer_larger_than_int<remove_cvref<L>>::value) || // scalar type
1651  (std::is_arithmetic<remove_cvref<R>>::value &&
1652  !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1653  // or one of the operands is Vector<T> with Vector<T>::size() ==
1654  // SimdArray::size()
1655  Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value)>
1656 struct evaluate;
1657 
1658 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1659 {
1660 private:
1661  using LScalar = Traits::entry_type_of<L>;
1662  using RScalar = Traits::entry_type_of<R>;
1663 
1664  template <bool B, typename T, typename F>
1665  using conditional = typename std::conditional<B, T, F>::type;
1666 
1667 public:
1668  // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1669  // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1670  // int are promoted to int before any operation). This would imply that SIMD types with integral
1671  // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1672  // start. Therefore we special-case those operations where the scalar type of both operands is
1673  // integral and smaller than int.
1674  // In addition, there is no generic support for 64-bit int SIMD types. Therefore
1675  // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1676  // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1677  // then the operator is disabled altogether. We do not want an implicit demotion.
1678  using type = fixed_size_simd<
1679  conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1680  sizeof(LScalar) < sizeof(int) &&
1681  sizeof(RScalar) < sizeof(int)),
1682  conditional<(sizeof(LScalar) == sizeof(RScalar)),
1683  conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1684  conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1685  decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1686  N>;
1687 };
1688 
1689 } // namespace result_vector_type_internal
1690 
1691 template <typename L, typename R>
1692 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1693 
1694 #define Vc_BINARY_OPERATORS_(op_) \
1695  \
1696  template <typename L, typename R> \
1697  Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1698  { \
1699  using Return = result_vector_type<L, R>; \
1700  return Vc::Detail::operator op_( \
1701  static_cast<const Return &>(std::forward<L>(lhs)), \
1702  static_cast<const Return &>(std::forward<R>(rhs))); \
1703  }
1722 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1723 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1725 #undef Vc_BINARY_OPERATORS_
1726 #define Vc_BINARY_OPERATORS_(op_) \
1727  \
1728  template <typename L, typename R> \
1729  Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1730  R &&rhs) \
1731  { \
1732  using Promote = result_vector_type<L, R>; \
1733  return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1734  }
1753 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1755 #undef Vc_BINARY_OPERATORS_
1756 
1757 // math functions {{{1
1758 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1759  \
1760  template <typename T, std::size_t N, typename V, std::size_t M> \
1761  inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \
1762  { \
1763  return fixed_size_simd<T, N>::fromOperation( \
1764  Common::Operations::Forward_##name_(), x); \
1765  } \
1766  template <class T, int N> \
1767  fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \
1768  { \
1769  return fixed_size_simd<T, N>::fromOperation( \
1770  Common::Operations::Forward_##name_(), x); \
1771  } \
1772  Vc_NOTHING_EXPECTING_SEMICOLON
1773 
1774 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1775  \
1776  template <typename T, std::size_t N, typename V, std::size_t M> \
1777  inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \
1778  { \
1779  return fixed_size_simd_mask<T, N>::fromOperation( \
1780  Common::Operations::Forward_##name_(), x); \
1781  } \
1782  template <class T, int N> \
1783  fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \
1784  { \
1785  return fixed_size_simd_mask<T, N>::fromOperation( \
1786  Common::Operations::Forward_##name_(), x); \
1787  } \
1788  Vc_NOTHING_EXPECTING_SEMICOLON
1789 
1790 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1791  \
1792  template <typename T, std::size_t N, typename V, std::size_t M> \
1793  inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \
1794  const SimdArray<T, N, V, M> &y) \
1795  { \
1796  return fixed_size_simd<T, N>::fromOperation( \
1797  Common::Operations::Forward_##name_(), x, y); \
1798  } \
1799  Vc_NOTHING_EXPECTING_SEMICOLON
1800 
1806 Vc_FORWARD_UNARY_OPERATOR(abs);
1807 Vc_FORWARD_UNARY_OPERATOR(asin);
1808 Vc_FORWARD_UNARY_OPERATOR(atan);
1809 Vc_FORWARD_BINARY_OPERATOR(atan2);
1810 Vc_FORWARD_UNARY_OPERATOR(ceil);
1811 Vc_FORWARD_BINARY_OPERATOR(copysign);
1812 Vc_FORWARD_UNARY_OPERATOR(cos);
1813 Vc_FORWARD_UNARY_OPERATOR(exp);
1814 Vc_FORWARD_UNARY_OPERATOR(exponent);
1815 Vc_FORWARD_UNARY_OPERATOR(floor);
1817 template <typename T, std::size_t N>
1819  const SimdArray<T, N> &c)
1820 {
1821  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1822 }
1823 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1824 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1825 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1826 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1828 template <typename T, std::size_t N>
1830 {
1831  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1832 }
1834 template <typename T, std::size_t N>
1836 {
1837  return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1838 }
1839 Vc_FORWARD_UNARY_OPERATOR(log);
1840 Vc_FORWARD_UNARY_OPERATOR(log10);
1841 Vc_FORWARD_UNARY_OPERATOR(log2);
1842 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1843 Vc_FORWARD_UNARY_OPERATOR(round);
1844 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1845 Vc_FORWARD_UNARY_OPERATOR(sin);
1847 template <typename T, std::size_t N>
1849 {
1850  SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1851 }
1852 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1853 Vc_FORWARD_UNARY_OPERATOR(trunc);
1854 Vc_FORWARD_BINARY_OPERATOR(min);
1855 Vc_FORWARD_BINARY_OPERATOR(max);
1857 #undef Vc_FORWARD_UNARY_OPERATOR
1858 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1859 #undef Vc_FORWARD_BINARY_OPERATOR
1860 
1861 // simd_cast {{{1
1862 #ifdef Vc_MSVC
1863 #define Vc_DUMMY_ARG0 , int = 0
1864 #define Vc_DUMMY_ARG1 , long = 0
1865 #define Vc_DUMMY_ARG2 , short = 0
1866 #define Vc_DUMMY_ARG3 , char = '0'
1867 #define Vc_DUMMY_ARG4 , unsigned = 0u
1868 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1869 #else
1870 #define Vc_DUMMY_ARG0
1871 #define Vc_DUMMY_ARG1
1872 #define Vc_DUMMY_ARG2
1873 #define Vc_DUMMY_ARG3
1874 #define Vc_DUMMY_ARG4
1875 #define Vc_DUMMY_ARG5
1876 #endif // Vc_MSVC
1877 
1878 // simd_cast_impl_smaller_input {{{2
1879 // The following function can be implemented without the sizeof...(From) overload.
1880 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1881 // function in two works around the issue.
1882 template <typename Return, std::size_t N, typename T, typename... From>
1883 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1884 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1885 {
1886  Return r = simd_cast<Return>(xs...);
1887  for (size_t i = 0; i < N; ++i) {
1888  r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1889  }
1890  return r;
1891 }
1892 template <typename Return, std::size_t N, typename T>
1893 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1894 {
1895  Return r = Return();
1896  for (size_t i = 0; i < N; ++i) {
1897  r[i] = static_cast<typename Return::EntryType>(last[i]);
1898  }
1899  return r;
1900 }
1901 template <typename Return, std::size_t N, typename T, typename... From>
1902 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1903  const From &... xs, const T &last)
1904 {
1905  Return r = simd_cast<Return>(xs...);
1906  for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1907  r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1908  }
1909  return r;
1910 }
1911 template <typename Return, std::size_t N, typename T>
1912 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1913 {
1914  Return r = Return();
1915  for (size_t i = 0; i < Return::size(); ++i) {
1916  r[i] = static_cast<typename Return::EntryType>(last[i]);
1917  }
1918  return r;
1919 }
1920 
1921 // simd_cast_without_last (declaration) {{{2
1922 template <typename Return, typename T, typename... From>
1923 Vc_INTRINSIC_L Vc_CONST_L Return
1924  simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1925 
1926 // are_all_types_equal {{{2
1927 template <typename... Ts> struct are_all_types_equal;
1928 template <typename T>
1929 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1930 {
1931 };
1932 template <typename T0, typename T1, typename... Ts>
1933 struct are_all_types_equal<T0, T1, Ts...>
1934  : public std::integral_constant<
1935  bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1936 {
1937 };
1938 
1939 // simd_cast_interleaved_argument_order (declarations) {{{2
1959 template <typename Return, typename... Ts>
1960 Vc_INTRINSIC Vc_CONST Return
1961  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1962 
1963 // simd_cast_with_offset (declarations and one impl) {{{2
1964 // offset == 0 {{{3
1965 template <typename Return, std::size_t offset, typename From, typename... Froms>
1966 Vc_INTRINSIC Vc_CONST
1967  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1968  simd_cast_with_offset(const From &x, const Froms &... xs);
1969 // offset > 0 && offset divisible by Return::Size {{{3
1970 template <typename Return, std::size_t offset, typename From>
1971 Vc_INTRINSIC Vc_CONST
1972  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1973  simd_cast_with_offset(const From &x);
1974 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1975 template <typename Return, std::size_t offset, typename From>
1976 Vc_INTRINSIC Vc_CONST
1977  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1978  ((Traits::isSimdArray<Return>::value &&
1979  !Traits::isAtomicSimdArray<Return>::value) ||
1980  (Traits::isSimdMaskArray<Return>::value &&
1981  !Traits::isAtomicSimdMaskArray<Return>::value))),
1982  Return>
1983  simd_cast_with_offset(const From &x);
1984 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1985 template <typename Return, std::size_t offset, typename From>
1986 Vc_INTRINSIC Vc_CONST
1987  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1988  ((Traits::isSimdArray<Return>::value &&
1989  Traits::isAtomicSimdArray<Return>::value) ||
1990  (Traits::isSimdMaskArray<Return>::value &&
1991  Traits::isAtomicSimdMaskArray<Return>::value))),
1992  Return>
1993  simd_cast_with_offset(const From &x);
1994 // offset > first argument (drops first arg) {{{3
1995 template <typename Return, std::size_t offset, typename From, typename... Froms>
1996 Vc_INTRINSIC Vc_CONST enable_if<
1997  (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1998  simd_cast_with_offset(const From &, const Froms &... xs)
1999 {
2000  return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2001 }
2002 
2003 // offset > first and only argument (returns Zero) {{{3
2004 template <typename Return, std::size_t offset, typename From>
2005 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2006  const From &)
2007 {
2008  return Return(0);
2009 }
2010 
2011 // first_type_of {{{2
2012 template <typename T, typename... Ts> struct first_type_of_impl
2013 {
2014  using type = T;
2015 };
2016 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
2017 
2018 // simd_cast_drop_arguments (declarations) {{{2
2019 template <typename Return, typename From>
2020 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2021 template <typename Return, typename... Froms>
2022 Vc_INTRINSIC Vc_CONST
2023  enable_if<(are_all_types_equal<Froms...>::value &&
2024  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2025  Return>
2026  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2027 // The following function can be implemented without the sizeof...(From) overload.
2028 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2029 // function in two works around the issue.
2030 template <typename Return, typename From, typename... Froms>
2031 Vc_INTRINSIC Vc_CONST enable_if<
2032  (are_all_types_equal<From, Froms...>::value &&
2033  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2034  Return>
2035 simd_cast_drop_arguments(Froms... xs, From x, From);
2036 template <typename Return, typename From>
2037 Vc_INTRINSIC Vc_CONST
2038  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2039  simd_cast_drop_arguments(From x, From);
2040 
2041 namespace
2042 {
2043 #ifdef Vc_DEBUG_SIMD_CAST
2044 void debugDoNothing(const std::initializer_list<void *> &) {}
2045 template <typename T0, typename... Ts>
2046 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
2047  const Ts &... args)
2048 {
2049  std::cerr << prefix << arg0;
2050  debugDoNothing({&(std::cerr << ", " << args)...});
2051  std::cerr << suffix;
2052 }
2053 #else
2054 template <typename T0, typename... Ts>
2055 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2056 {
2057 }
2058 #endif
2059 } // unnamed namespace
2060 
2061 // is_less trait{{{2
2062 template <size_t A, size_t B>
2063 struct is_less : public std::integral_constant<bool, (A < B)> {
2064 };
2065 
2066 // is_power_of_2 trait{{{2
2067 template <size_t N>
2068 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2069 };
2070 
2071 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
2072 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2073  template <typename Return, typename T, typename A, typename... Froms> \
2074  Vc_INTRINSIC Vc_CONST enable_if< \
2075  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2076  is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2077  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2078  !detail::is_fixed_size_abi<A>::value), \
2079  Return> \
2080  simd_cast(NativeType_<T, A> x, Froms... xs) \
2081  { \
2082  vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2083  return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2084  } \
2085  template <typename Return, typename T, typename A, typename... Froms> \
2086  Vc_INTRINSIC Vc_CONST enable_if< \
2087  (Traits::isAtomic##SimdArrayType_<Return>::value && \
2088  !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2089  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2090  !detail::is_fixed_size_abi<A>::value), \
2091  Return> \
2092  simd_cast(NativeType_<T, A> x, Froms... xs) \
2093  { \
2094  vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2095  return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2096  } \
2097  template <typename Return, typename T, typename A, typename... Froms> \
2098  Vc_INTRINSIC Vc_CONST \
2099  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2100  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2101  is_less<Common::left_size<Return::Size>(), \
2102  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2103  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2104  !detail::is_fixed_size_abi<A>::value), \
2105  Return> \
2106  simd_cast(NativeType_<T, A> x, Froms... xs) \
2107  { \
2108  vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2109  using R0 = typename Return::storage_type0; \
2110  using R1 = typename Return::storage_type1; \
2111  return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2112  simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2113  } \
2114  template <typename Return, typename T, typename A, typename... Froms> \
2115  Vc_INTRINSIC Vc_CONST \
2116  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2117  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2118  !is_less<Common::left_size<Return::Size>(), \
2119  NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2120  are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2121  !detail::is_fixed_size_abi<A>::value), \
2122  Return> \
2123  simd_cast(NativeType_<T, A> x, Froms... xs) \
2124  { \
2125  vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2126  using R0 = typename Return::storage_type0; \
2127  using R1 = typename Return::storage_type1; \
2128  return {simd_cast<R0>(x, xs...), R1(0)}; \
2129  } \
2130  Vc_NOTHING_EXPECTING_SEMICOLON
2131 
2132 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2133 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2134 #undef Vc_SIMDARRAY_CASTS
2135 
2136 // simd_cast<SimdArray/-mask, offset>(V) {{{2
2137 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2138  /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2139  template <typename Return, int offset, typename T, typename A> \
2140  Vc_INTRINSIC Vc_CONST \
2141  enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2142  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2143  { \
2144  vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2145  return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2146  } \
2147  /* both halves of Return array are extracted from argument */ \
2148  template <typename Return, int offset, typename T, typename A> \
2149  Vc_INTRINSIC Vc_CONST \
2150  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2151  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2152  Return::Size * offset + Common::left_size<Return::Size>() < \
2153  NativeType_<T, A>::Size), \
2154  Return> \
2155  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2156  { \
2157  vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2158  using R0 = typename Return::storage_type0; \
2159  constexpr int entries_offset = offset * Return::Size; \
2160  constexpr int entries_offset_right = entries_offset + R0::Size; \
2161  return { \
2162  simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2163  simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2164  x)}; \
2165  } \
2166  /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2167  /* right half of Return array is zero */ \
2168  template <typename Return, int offset, typename T, typename A> \
2169  Vc_INTRINSIC Vc_CONST \
2170  enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2171  !Traits::isAtomic##SimdArrayType_<Return>::value && \
2172  Return::Size * offset + Common::left_size<Return::Size>() >= \
2173  NativeType_<T, A>::Size), \
2174  Return> \
2175  simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2176  { \
2177  vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2178  using R0 = typename Return::storage_type0; \
2179  using R1 = typename Return::storage_type1; \
2180  constexpr int entries_offset = offset * Return::Size; \
2181  return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2182  } \
2183  Vc_NOTHING_EXPECTING_SEMICOLON
2184 
2185 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2186 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2187 #undef Vc_SIMDARRAY_CASTS
2188 
2189 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
2190 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2191  /* indivisible SimdArrayType_ */ \
2192  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2193  Vc_INTRINSIC Vc_CONST \
2194  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2195  (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2196  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2197  Return> \
2198  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2199  { \
2200  vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2201  return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2202  } \
2203  /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2204  template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2205  Vc_INTRINSIC Vc_CONST \
2206  enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2207  (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2208  !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2209  Return> \
2210  simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2211  { \
2212  vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2213  return simd_cast_without_last<Return, \
2214  typename SimdArrayType_<T, N, V, N>::storage_type, \
2215  typename From::storage_type...>( \
2216  internal_data(x0), internal_data(xs)...); \
2217  } \
2218  /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2219  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2220  typename... From> \
2221  Vc_INTRINSIC Vc_CONST enable_if< \
2222  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2223  !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2224  is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2225  Return> \
2226  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2227  { \
2228  vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2229  return simd_cast_interleaved_argument_order< \
2230  Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2231  typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2232  internal_data1(x0), internal_data1(xs)...); \
2233  } \
2234  /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2235  * input can be dropped */ \
2236  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2237  typename... From> \
2238  Vc_INTRINSIC Vc_CONST enable_if< \
2239  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2240  !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2241  Return> \
2242  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2243  { \
2244  vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2245  return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2246  x0, xs...); \
2247  } \
2248  /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2249  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2250  typename... From> \
2251  Vc_INTRINSIC Vc_CONST enable_if< \
2252  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2253  N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2254  Return> \
2255  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2256  { \
2257  vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2258  return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2259  From...>(x0, xs...); \
2260  } \
2261  /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2262  template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2263  typename... From> \
2264  Vc_INTRINSIC Vc_CONST enable_if< \
2265  (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2266  N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2267  Return> \
2268  simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2269  { \
2270  vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2271  return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2272  From...>(x0, xs...); \
2273  } \
2274  /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2275  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2276  Vc_INTRINSIC Vc_CONST \
2277  enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2278  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2279  { \
2280  vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2281  return simd_cast<Return>(internal_data0(x)); \
2282  } \
2283  template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2284  Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2285  N < 2 * Return::Size && is_power_of_2<N>::value), \
2286  Return> \
2287  simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2288  { \
2289  vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2290  return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2291  } \
2292  Vc_NOTHING_EXPECTING_SEMICOLON
2293 
2294 Vc_SIMDARRAY_CASTS(SimdArray);
2295 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2296 #undef Vc_SIMDARRAY_CASTS
2297 template <class Return, class T, int N, class... Ts,
2298  class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2299 Vc_INTRINSIC Return simd_cast(const fixed_size_simd<T, N> &x, const Ts &... xs)
2300 {
2301  return simd_cast<Return>(static_cast<const SimdArray<T, N> &>(x),
2302  static_cast<const SimdArray<T, N> &>(xs)...);
2303 }
2304 template <class Return, class T, int N, class... Ts,
2305  class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2306 Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask<T, N> &x, const Ts &... xs)
2307 {
2308  return simd_cast<Return>(static_cast<const SimdMaskArray<T, N> &>(x),
2309  static_cast<const SimdMaskArray<T, N> &>(xs)...);
2310 }
2311 
2312 // simd_cast<T, offset>(SimdArray/-mask) {{{2
2313 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2314  /* offset == 0 is like without offset */ \
2315  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2316  std::size_t M> \
2317  Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2318  const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2319  { \
2320  vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2321  return simd_cast<Return>(x); \
2322  } \
2323  /* forward to V */ \
2324  template <typename Return, int offset, typename T, std::size_t N, typename V> \
2325  Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2326  const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2327  { \
2328  vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2329  return simd_cast<Return, offset>(internal_data(x)); \
2330  } \
2331  /* convert from right member of SimdArray */ \
2332  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2333  std::size_t M> \
2334  Vc_INTRINSIC Vc_CONST \
2335  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2336  offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2337  Return> \
2338  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2339  { \
2340  vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2341  return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2342  internal_data1(x)); \
2343  } \
2344  /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2345  * left side of the SimdArray */ \
2346  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2347  std::size_t M> \
2348  Vc_INTRINSIC Vc_CONST \
2349  enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2350  offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2351  Return> \
2352  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2353  { \
2354  vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2355  return simd_cast_with_offset<Return, \
2356  offset * Return::Size - Common::left_size<N>()>( \
2357  internal_data1(x)); \
2358  } \
2359  /* convert from left member of SimdArray */ \
2360  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2361  std::size_t M> \
2362  Vc_INTRINSIC Vc_CONST enable_if< \
2363  (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2364  offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2365  Return> \
2366  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2367  { \
2368  vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2369  return simd_cast<Return, offset>(internal_data0(x)); \
2370  } \
2371  /* fallback to copying scalars */ \
2372  template <typename Return, int offset, typename T, std::size_t N, typename V, \
2373  std::size_t M> \
2374  Vc_INTRINSIC Vc_CONST \
2375  enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2376  offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2377  Return> \
2378  simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2379  { \
2380  vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2381  using R = typename Return::EntryType; \
2382  Return r = Return(0); \
2383  for (std::size_t i = offset * Return::Size; \
2384  i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2385  r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2386  } \
2387  return r; \
2388  } \
2389  Vc_NOTHING_EXPECTING_SEMICOLON
2390 Vc_SIMDARRAY_CASTS(SimdArray);
2391 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2392 #undef Vc_SIMDARRAY_CASTS
2393 // simd_cast_drop_arguments (definitions) {{{2
2394 template <typename Return, typename From>
2395 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2396 {
2397  return simd_cast<Return>(x);
2398 }
2399 template <typename Return, typename... Froms>
2400 Vc_INTRINSIC Vc_CONST
2401  enable_if<(are_all_types_equal<Froms...>::value &&
2402  sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2403  Return>
2404  simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2405 {
2406  return simd_cast<Return>(xs..., x);
2407 }
2408 // The following function can be implemented without the sizeof...(From) overload.
2409 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2410 // function in two works around the issue.
2411 template <typename Return, typename From, typename... Froms>
2412 Vc_INTRINSIC Vc_CONST enable_if<
2413  (are_all_types_equal<From, Froms...>::value &&
2414  (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2415  Return>
2416 simd_cast_drop_arguments(Froms... xs, From x, From)
2417 {
2418  return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2419 }
2420 template <typename Return, typename From>
2421 Vc_INTRINSIC Vc_CONST
2422  enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2423  simd_cast_drop_arguments(From x, From)
2424 {
2425  return simd_cast_drop_arguments<Return>(x);
2426 }
2427 
2428 // simd_cast_with_offset (definitions) {{{2
2429  template <typename Return, std::size_t offset, typename From>
2430  Vc_INTRINSIC Vc_CONST
2431  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2432  Return> simd_cast_with_offset(const From &x)
2433 {
2434  return simd_cast<Return, offset / Return::Size>(x);
2435 }
2436 template <typename Return, std::size_t offset, typename From>
2437 Vc_INTRINSIC Vc_CONST
2438  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2439  ((Traits::isSimdArray<Return>::value &&
2440  !Traits::isAtomicSimdArray<Return>::value) ||
2441  (Traits::isSimdMaskArray<Return>::value &&
2442  !Traits::isAtomicSimdMaskArray<Return>::value))),
2443  Return>
2444  simd_cast_with_offset(const From &x)
2445 {
2446  using R0 = typename Return::storage_type0;
2447  using R1 = typename Return::storage_type1;
2448  return {simd_cast_with_offset<R0, offset>(x),
2449  simd_cast_with_offset<R1, offset + R0::Size>(x)};
2450 }
2451 template <typename Return, std::size_t offset, typename From>
2452 Vc_INTRINSIC Vc_CONST
2453  enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2454  ((Traits::isSimdArray<Return>::value &&
2455  Traits::isAtomicSimdArray<Return>::value) ||
2456  (Traits::isSimdMaskArray<Return>::value &&
2457  Traits::isAtomicSimdMaskArray<Return>::value))),
2458  Return>
2459  simd_cast_with_offset(const From &x)
2460 {
2461  return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2462 }
2463 template <typename Return, std::size_t offset, typename From, typename... Froms>
2464 Vc_INTRINSIC Vc_CONST
2465  enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2466  simd_cast_with_offset(const From &x, const Froms &... xs)
2467 {
2468  return simd_cast<Return>(x, xs...);
2469 }
2470 
2471 // simd_cast_without_last (definition) {{{2
2472 template <typename Return, typename T, typename... From>
2473 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2474 {
2475  return simd_cast<Return>(xs...);
2476 }
2477 
2478 // simd_cast_interleaved_argument_order (definitions) {{{2
2479 
2480 #ifdef Vc_MSVC
2481 // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2482 // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2483 // MSVC do the right thing.
2484 template <std::size_t I, typename T0>
2485 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2486 {
2487  return a0;
2488 }
2489 template <std::size_t I, typename T0>
2490 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2491 {
2492  return b0;
2493 }
2494 #endif // Vc_MSVC
2495 
2497 template <std::size_t I, typename T0, typename... Ts>
2498 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2499  const Ts &...,
2500  const T0 &,
2501  const Ts &...)
2502 {
2503  return a0;
2504 }
2506 template <std::size_t I, typename T0, typename... Ts>
2507 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2508  const Ts &...,
2509  const T0 &b0,
2510  const Ts &...)
2511 {
2512  return b0;
2513 }
2515 template <std::size_t I, typename T0, typename... Ts>
2516 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2517  const Ts &... a,
2518  const T0 &,
2519  const Ts &... b)
2520 {
2521  return extract_interleaved<I - 2, Ts...>(a..., b...);
2522 }
2524 template <typename Return, typename... Ts, std::size_t... Indexes>
2525 Vc_INTRINSIC Vc_CONST Return
2526  simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2527  const Ts &... b)
2528 {
2529  return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2530 }
2533 template <typename Return, typename... Ts>
2534 Vc_INTRINSIC Vc_CONST Return
2535  simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2536 {
2537  using seq = make_index_sequence<sizeof...(Ts)*2>;
2538  return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2539 }
2540 
2541 // conditional_assign {{{1
2542 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2543  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2544  typename U> \
2545  Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2546  SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2547  { \
2548  lhs(mask) op_ rhs; \
2549  } \
2550  Vc_NOTHING_EXPECTING_SEMICOLON
2551 Vc_CONDITIONAL_ASSIGN( Assign, =);
2552 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2553 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2554 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2555 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2556 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2557 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2558 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2559 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2560 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2561 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2562 #undef Vc_CONDITIONAL_ASSIGN
2563 
2564 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2565  template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2566  Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2567  conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2568  { \
2569  return expr_; \
2570  } \
2571  Vc_NOTHING_EXPECTING_SEMICOLON
2572 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2573 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2574 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2575 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2576 #undef Vc_CONDITIONAL_ASSIGN
2577 // transpose_impl {{{1
2578 namespace Common
2579 {
2580 template <typename T, size_t N, typename V>
2581 inline void transpose_impl(
2582  TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2583  const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2584  SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2585 {
2586  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2587  &internal_data(*r[2]), &internal_data(*r[3])};
2588  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2589  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2590  internal_data(std::get<1>(proxy.in)),
2591  internal_data(std::get<2>(proxy.in)),
2592  internal_data(std::get<3>(proxy.in))});
2593 }
2594 
2595 template <typename T, typename V>
2596 inline void transpose_impl(
2597  TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2598  const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2599  SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2600 {
2601  auto &lo = *r[0];
2602  auto &hi = *r[1];
2603  internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2604  internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2605  internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2606  internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2607  internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2608  internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2609  internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2610  internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2611 }
2612 
2613 template <typename T, typename V>
2614 inline void transpose_impl(
2615  TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2616  const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2617  SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2618 {
2619  V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2620  &internal_data(*r[2]), &internal_data(*r[3])};
2621  transpose_impl(TransposeTag<4, 4>(), &r2[0],
2622  TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2623  internal_data(std::get<1>(proxy.in)),
2624  internal_data(std::get<2>(proxy.in)),
2625  internal_data(std::get<3>(proxy.in))});
2626 }
2627 
2628 template <typename T, size_t N, typename V>
2629 inline void transpose_impl(
2630  TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2631  const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2632  SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2633 {
2634  SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2635  SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2636  using H = SimdArray<T, 2>;
2637  transpose_impl(TransposeTag<2, 4>(), &r0[0],
2638  TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2639  internal_data0(std::get<1>(proxy.in)),
2640  internal_data0(std::get<2>(proxy.in)),
2641  internal_data0(std::get<3>(proxy.in))});
2642  transpose_impl(TransposeTag<2, 4>(), &r1[0],
2643  TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2644  internal_data1(std::get<1>(proxy.in)),
2645  internal_data1(std::get<2>(proxy.in)),
2646  internal_data1(std::get<3>(proxy.in))});
2647 }
2648 
2649 /* TODO:
2650 template <typename T, std::size_t N, typename V, std::size_t VSize>
2651 inline enable_if<(N > VSize), void> transpose_impl(
2652  std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2653  const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2654  SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2655 {
2656  typedef SimdArray<T, N, V, VSize> SA;
2657  std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2658  {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2659  &internal_data0(*r[3])}};
2660  transpose_impl(
2661  r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2662  typename SA::storage_type0, typename SA::storage_type0>{
2663  internal_data0(std::get<0>(proxy.in)),
2664  internal_data0(std::get<1>(proxy.in)),
2665  internal_data0(std::get<2>(proxy.in)),
2666  internal_data0(std::get<3>(proxy.in))});
2667 
2668  std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2669  {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2670  &internal_data1(*r[3])}};
2671  transpose_impl(
2672  r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2673  typename SA::storage_type1, typename SA::storage_type1>{
2674  internal_data1(std::get<0>(proxy.in)),
2675  internal_data1(std::get<1>(proxy.in)),
2676  internal_data1(std::get<2>(proxy.in)),
2677  internal_data1(std::get<3>(proxy.in))});
2678 }
2679 */
2680 } // namespace Common
2681 
2682 // }}}1
2683 namespace Detail
2684 {
2685 // InterleaveImpl for SimdArrays {{{
2686 // atomic {{{1
2687 template <class T, size_t N, class V, size_t VSizeof>
2688 struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2689  template <class I, class... VV>
2690  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2691  {
2692  InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2693  }
2694  template <class I, class... VV>
2695  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2696  {
2697  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2698  }
2699 };
2700 
2701 // generic (TODO) {{{1
2702 /*
2703 template <class T, size_t N, class V, size_t Wt, size_t VSizeof>
2704 struct InterleaveImpl<SimdArray<T, N, V, Wt>, N, VSizeof> {
2705  using SA = SimdArray<T, N, V, Wt>;
2706  using SA0 = typename SA::storage_type0;
2707  using SA1 = typename SA::storage_type1;
2708 
2709  template <class I, class... VV>
2710  static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2711  {
2712  InterleaveImpl<SA0, SA0::size(), sizeof(SA0)>::interleave(
2713  data, i, // i needs to be split
2714  internal_data0(vv)...);
2715  InterleaveImpl<SA1, SA1::size(), sizeof(SA1)>::interleave(
2716  data, // how far to advance data?
2717  i, // i needs to be split
2718  internal_data1(vv)...);
2719  }
2720  template <class I, class... VV>
2721  static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2722  {
2723  InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2724  }
2725 };
2726 */
2727 } // namespace Detail
2728 // }}}
2730 
2731 } // namespace Vc_VERSIONED_NAMESPACE
2732 
2733 // numeric_limits {{{1
2734 namespace std
2735 {
2736 template <typename T, size_t N, typename V, size_t VN>
2737 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2738 private:
2739  using R = Vc::SimdArray<T, N, V, VN>;
2740 
2741 public:
2742  static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2743  static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2744  static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2745  {
2746  return numeric_limits<T>::lowest();
2747  }
2748  static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2749  {
2750  return numeric_limits<T>::epsilon();
2751  }
2752  static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2753  {
2754  return numeric_limits<T>::round_error();
2755  }
2756  static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2757  {
2758  return numeric_limits<T>::infinity();
2759  }
2760  static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2761  {
2762  return numeric_limits<T>::quiet_NaN();
2763  }
2764  static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2765  {
2766  return numeric_limits<T>::signaling_NaN();
2767  }
2768  static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2769  {
2770  return numeric_limits<T>::denorm_min();
2771  }
2772 };
2773 } // namespace std
2774 //}}}1
2775 
2776 #endif // VC_COMMON_SIMDARRAY_H_
2777 
2778 // vim: foldmethod=marker
The main SIMD mask class.
Definition: mask.h:42
Data-parallel arithmetic type with user-defined number of elements.
Definition: simdarray.h:617
static fixed_size_simd< T, N > One()
Returns a vector with the entries initialized to one.
Definition: simdarray.h:707
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition: simdarray.h:1035
fixed_size_simd< T, N > reversed() const
Returns a vector with all components reversed.
Definition: simdarray.h:1339
SimdArray()=default
Construct a zero-initialized vector object.
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition: simdarray.h:675
static fixed_size_simd< T, N > generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition: simdarray.h:732
SimdArray(value_type a)
Broadcast Constructor.
Definition: simdarray.h:755
fixed_size_simd< T, N > partialSum() const
Returns a vector containing the sum of all entries with smaller index.
Definition: simdarray.h:1104
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition: simdarray.h:1042
fixed_size_simd< T, N > apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition: simdarray.h:1114
fixed_size_simd< T, N > shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition: simdarray.h:1127
fixed_size_simd< T, N > rotated(int amount) const
Rotate vector entries to the left by amount.
Definition: simdarray.h:1256
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
Definition: simdarray.h:1050
fixed_size_simd< T, N > sorted() const
Return a sorted copy of the vector.
Definition: simdarray.h:1361
static fixed_size_simd< T, N > Zero()
Returns a vector with the entries initialized to zero.
Definition: simdarray.h:701
fixed_size_simd< T, N > apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
Definition: simdarray.h:1120
value_type EntryType
The type of the elements (i.e. T)
Definition: simdarray.h:684
fixed_size_simd< T, N > operator+() const
Returns a copy of itself.
Definition: simdarray.h:960
static fixed_size_simd< T, N > IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition: simdarray.h:713
static fixed_size_simd< T, N > Random()
Returns a vector with pseudo-random entries.
Definition: simdarray.h:719
T value_type
The type of the elements (i.e. T)
Definition: simdarray.h:657
The main vector class for expressing data parallelism.
Definition: vector.h:126
Vector sorted() const
Return a sorted copy of the vector.
Vector reversed() const
Returns a vector with all components reversed.
fixed_size_simd< T, N > abs(const SimdArray< T, N, V, M > &x)
Applies the std:: abs function component-wise and concurrently.
Definition: simdarray.h:1806
fixed_size_simd< T, N > floor(const SimdArray< T, N, V, M > &x)
Applies the std:: floor function component-wise and concurrently.
Definition: simdarray.h:1815
fixed_size_simd< T, N > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Definition: simdarray.h:1855
fixed_size_simd< T, N > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Definition: simdarray.h:1854
fixed_size_simd< T, N > asin(const SimdArray< T, N, V, M > &x)
Applies the std:: asin function component-wise and concurrently.
Definition: simdarray.h:1807
fixed_size_simd< T, N > atan2(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: atan2 function component-wise and concurrently.
Definition: simdarray.h:1809
fixed_size_simd< T, N > trunc(const SimdArray< T, N, V, M > &x)
Applies the std:: trunc function component-wise and concurrently.
Definition: simdarray.h:1853
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
Definition: simdarray.h:1722
fixed_size_simd< T, N > reciprocal(const SimdArray< T, N, V, M > &x)
Applies the std:: reciprocal function component-wise and concurrently.
Definition: simdarray.h:1842
fixed_size_simd< T, N > ceil(const SimdArray< T, N, V, M > &x)
Applies the std:: ceil function component-wise and concurrently.
Definition: simdarray.h:1810
fixed_size_simd< T, N > exp(const SimdArray< T, N, V, M > &x)
Applies the std:: exp function component-wise and concurrently.
Definition: simdarray.h:1813
fixed_size_simd< T, N > log10(const SimdArray< T, N, V, M > &x)
Applies the std:: log10 function component-wise and concurrently.
Definition: simdarray.h:1840
fixed_size_simd< T, N > log2(const SimdArray< T, N, V, M > &x)
Applies the std:: log2 function component-wise and concurrently.
Definition: simdarray.h:1841
fixed_size_simd< T, N > rsqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: rsqrt function component-wise and concurrently.
Definition: simdarray.h:1844
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
Definition: simdarray.h:1818
fixed_size_simd_mask< T, N > isnan(const SimdArray< T, N, V, M > &x)
Applies the std:: isnan function component-wise and concurrently.
Definition: simdarray.h:1825
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1814
fixed_size_simd< T, N > sin(const SimdArray< T, N, V, M > &x)
Applies the std:: sin function component-wise and concurrently.
Definition: simdarray.h:1845
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
Definition: simdarray.h:1829
fixed_size_simd< T, N > sqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: sqrt function component-wise and concurrently.
Definition: simdarray.h:1852
fixed_size_simd< T, N > atan(const SimdArray< T, N, V, M > &x)
Applies the std:: atan function component-wise and concurrently.
Definition: simdarray.h:1808
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
Definition: simdarray.h:1835
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Definition: simdarray.h:1722
fixed_size_simd_mask< T, N > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Definition: simdarray.h:1826
fixed_size_simd_mask< T, N > isinf(const SimdArray< T, N, V, M > &x)
Applies the std:: isinf function component-wise and concurrently.
Definition: simdarray.h:1824
fixed_size_simd_mask< T, N > isfinite(const SimdArray< T, N, V, M > &x)
Applies the std:: isfinite function component-wise and concurrently.
Definition: simdarray.h:1823
fixed_size_simd< T, N > log(const SimdArray< T, N, V, M > &x)
Applies the std:: log function component-wise and concurrently.
Definition: simdarray.h:1839
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition: simdarray.h:1848
fixed_size_simd< T, N > cos(const SimdArray< T, N, V, M > &x)
Applies the std:: cos function component-wise and concurrently.
Definition: simdarray.h:1812
fixed_size_simd< T, N > round(const SimdArray< T, N, V, M > &x)
Applies the std:: round function component-wise and concurrently.
Definition: simdarray.h:1843
fixed_size_simd< T, N > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Definition: simdarray.h:1811
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1069
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:91
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true.
Definition: types.h:86
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:81
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
Definition: interleave.h:55
void deinterleave(V *a, V *b, const M *memory, A align)
Definition: deinterleave.h:76
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:215
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:117
Vector Classes Namespace.
Definition: dox.h:585
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
To simd_cast(From &&x, enable_if< std::is_same< To, Traits::decay< From >>::value >=nullarg)
Casts the argument x from type From to type To.
Definition: simd_cast.h:52
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1221