Vc 1.4.5
SIMD Vector Classes for C++
 
Loading...
Searching...
No Matches
simdarray.h
1/* This file is part of the Vc library. {{{
2Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the names of contributing organizations nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26}}}*/
27
28#ifndef VC_COMMON_SIMDARRAY_H_
29#define VC_COMMON_SIMDARRAY_H_
30
31//#define Vc_DEBUG_SIMD_CAST 1
32//#define Vc_DEBUG_SORTED 1
33//#include "../IO"
34
35#include <array>
36#include <limits>
37
38#include "writemaskedvector.h"
39#include "simdarrayhelper.h"
40#include "simdmaskarray.h"
41#include "utility.h"
42#include "interleave.h"
43#include "indexsequence.h"
44#include "transpose.h"
45#include "macros.h"
46
47namespace Vc_VERSIONED_NAMESPACE
48{
49// select_best_vector_type {{{
50namespace Common
51{
54
58template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
59// last candidate; this one must work; assume it does:
60template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
61 using type = T;
62};
63// check the next candidate; use it if N >= T::size(); recurse otherwise:
64template <std::size_t N, class T, class... Candidates>
65struct select_best_vector_type_impl<N, T, Candidates...> {
66 using type = typename std::conditional<
67 (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
68 T>::type;
69};
70template <class T, std::size_t N>
71struct select_best_vector_type : select_best_vector_type_impl<N,
72#ifdef Vc_IMPL_AVX2
73 Vc::AVX2::Vector<T>,
74#elif defined Vc_IMPL_AVX
75 Vc::AVX::Vector<T>,
76#endif
77#ifdef Vc_IMPL_SSE
78 Vc::SSE::Vector<T>,
79#endif
80 Vc::Scalar::Vector<T>> {
81};
83} // namespace Common
84// }}}
85// internal namespace (product & sum helper) {{{1
86namespace internal
87{
88template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
89template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
90} // namespace internal
91
92// min & max declarations {{{1
93template <typename T, std::size_t N, typename V, std::size_t M>
94inline fixed_size_simd<T, N> min(const SimdArray<T, N, V, M> &x,
95 const SimdArray<T, N, V, M> &y);
96template <typename T, std::size_t N, typename V, std::size_t M>
97inline fixed_size_simd<T, N> max(const SimdArray<T, N, V, M> &x,
98 const SimdArray<T, N, V, M> &y);
99
100// SimdArray class {{{1
103
104// atomic SimdArray {{{1
105#define Vc_CURRENT_CLASS_NAME SimdArray
115template <typename T, std::size_t N, typename VectorType_>
116class SimdArray<T, N, VectorType_, N>
117{
118 static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
119 std::is_same<T, int32_t>::value ||
120 std::is_same<T, uint32_t>::value ||
121 std::is_same<T, int16_t>::value ||
122 std::is_same<T, uint16_t>::value,
123 "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
124 "int16_t, uint16_t }");
125 static_assert(
126 std::is_same<VectorType_,
127 typename Common::select_best_vector_type<T, N>::type>::value &&
128 VectorType_::size() == N,
129 "ERROR: leave the third and fourth template parameters with their defaults. They "
130 "are implementation details.");
131
132public:
133 static constexpr bool is_atomic = true;
134 using VectorType = VectorType_;
135 using vector_type = VectorType;
136 using storage_type = vector_type;
137 using vectorentry_type = typename vector_type::VectorEntryType;
138 using value_type = T;
139 using mask_type = fixed_size_simd_mask<T, N>;
140 using index_type = fixed_size_simd<int, N>;
141 static constexpr std::size_t size() { return N; }
142 using Mask = mask_type;
143 using MaskType = Mask;
144 using MaskArgument = const MaskType &;
145 using VectorEntryType = vectorentry_type;
146 using EntryType = value_type;
147 using IndexType = index_type;
148 using AsArg = const SimdArray &;
149 using reference = Detail::ElementReference<SimdArray>;
150 static constexpr std::size_t Size = size();
151 static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
152
153 // zero init
154 Vc_INTRINSIC SimdArray() = default;
155
156 // default copy ctor/operator
157 Vc_INTRINSIC SimdArray(const SimdArray &) = default;
158 Vc_INTRINSIC SimdArray(SimdArray &&) = default;
159 Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
160
161 // broadcast
162 Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
163 Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
164 Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
165 template <
166 typename U,
167 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
168 Vc_INTRINSIC SimdArray(U a)
169 : SimdArray(static_cast<value_type>(a))
170 {
171 }
172
173 // implicit casts
174 template <class U, class V, class = enable_if<N == V::Size>>
175 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
176 : data(simd_cast<vector_type>(internal_data(x)))
177 {
178 }
179 template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
180 class = U>
181 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
182 : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
183 internal_data(internal_data1(x))))
184 {
185 }
186 template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
187 class = U, class = U>
188 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
189 : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
190 internal_data(internal_data1(internal_data0(x))),
191 internal_data(internal_data0(internal_data1(x))),
192 internal_data(internal_data1(internal_data1(x)))))
193 {
194 }
195
196 template <typename V, std::size_t Pieces, std::size_t Index>
197 Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
198 : data(simd_cast<vector_type, Index>(x.data))
199 {
200 }
201
202 Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
203 : data(init.begin(), Vc::Unaligned)
204 {
205 Vc_ASSERT(init.size() == size());
206 }
207
208 // implicit conversion from underlying vector_type
209 template <
210 typename V,
211 typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
212 Vc_INTRINSIC SimdArray(const V &x)
213 : data(simd_cast<vector_type>(x))
214 {
215 }
216
217 // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
218 // T implicitly convertible to U
219 template <typename U, typename A,
220 typename =
221 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
222 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
223 Vc_INTRINSIC operator Vector<U, A>() const
224 {
225 return simd_cast<Vector<U, A>>(data);
226 }
227 operator fixed_size_simd<T, N> &()
228 {
229 return static_cast<fixed_size_simd<T, N> &>(*this);
230 }
231 operator const fixed_size_simd<T, N> &() const
232 {
233 return static_cast<const fixed_size_simd<T, N> &>(*this);
234 }
235
236#include "gatherinterface.h"
237#include "scatterinterface.h"
238
239 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
240 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
241 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
242 {
243 }
244 template <std::size_t Offset>
245 explicit Vc_INTRINSIC SimdArray(
246 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
247 : data(Vc::IndexesFromZero)
248 {
249 data += value_type(Offset);
250 }
251
252 Vc_INTRINSIC void setZero() { data.setZero(); }
253 Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
254 Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
255 Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
256
257 Vc_INTRINSIC void setQnan() { data.setQnan(); }
258 Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
259
260 // internal: execute specified Operation
261 template <typename Op, typename... Args>
262 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
263 {
264 fixed_size_simd<T, N> r;
265 Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
266 return r;
267 }
268
269 template <typename Op, typename... Args>
270 static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
271 {
272 Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
273 }
274
275 static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
276 {
277 return SimdArray(Vc::Zero);
278 }
279 static Vc_INTRINSIC fixed_size_simd<T, N> One()
280 {
281 return SimdArray(Vc::One);
282 }
283 static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
284 {
285 return SimdArray(Vc::IndexesFromZero);
286 }
287 static Vc_INTRINSIC fixed_size_simd<T, N> Random()
288 {
289 return fromOperation(Common::Operations::random());
290 }
291
292 // load ctor
293 template <class U, class Flags = DefaultLoadTag,
294 class = enable_if<std::is_arithmetic<U>::value &&
295 Traits::is_load_store_flag<Flags>::value>>
296 explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {}) : data(mem, f)
297 {
298 }
299
300 template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
301 {
302 data.load(std::forward<Args>(args)...);
303 }
304
305 template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
306 {
307 data.store(std::forward<Args>(args)...);
308 }
309
310 Vc_INTRINSIC mask_type operator!() const
311 {
312 return {private_init, !data};
313 }
314
315 Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
316 {
317 return {private_init, -data};
318 }
319
321 Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
322
323 Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
324 {
325 return {private_init, ~data};
326 }
327
328 template <typename U,
329 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
330 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
331 {
332 return {private_init, data << x};
333 }
334 template <typename U,
335 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
336 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
337 {
338 data <<= x;
339 return *this;
340 }
341 template <typename U,
342 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
343 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
344 {
345 return {private_init, data >> x};
346 }
347 template <typename U,
348 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
349 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
350 {
351 data >>= x;
352 return *this;
353 }
354
355#define Vc_BINARY_OPERATOR_(op) \
356 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
357 { \
358 data op## = rhs.data; \
359 return *this; \
360 }
361 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
362 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
363 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
364#undef Vc_BINARY_OPERATOR_
365
367 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
368 {
369 return {private_init, isnegative(data)};
370 }
371
372private:
373 friend reference;
374 Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
375 {
376 return o.data[i];
377 }
378 template <typename U>
379 Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
380 noexcept(std::declval<value_type &>() = v))
381 {
382 o.data[i] = v;
383 }
384
385public:
392 Vc_INTRINSIC reference operator[](size_t i) noexcept
393 {
394 static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
395 return {*this, int(i)};
396 }
397 Vc_INTRINSIC value_type operator[](size_t i) const noexcept
398 {
399 return get(*this, int(i));
400 }
401
402 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
403 {
404 return {*this, k};
405 }
406
407 Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
408 {
409 data.assign(v.data, internal_data(k));
410 }
411
412 // reductions ////////////////////////////////////////////////////////
413#define Vc_REDUCTION_FUNCTION_(name_) \
414 Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
415 Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
416 { \
417 return data.name_(internal_data(mask)); \
418 } \
419 Vc_NOTHING_EXPECTING_SEMICOLON
420 Vc_REDUCTION_FUNCTION_(min);
421 Vc_REDUCTION_FUNCTION_(max);
422 Vc_REDUCTION_FUNCTION_(product);
423 Vc_REDUCTION_FUNCTION_(sum);
424#undef Vc_REDUCTION_FUNCTION_
425 Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
426 {
427 return {private_init, data.partialSum()};
428 }
429
430 template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f) const
431 {
432 return {private_init, data.apply(std::forward<F>(f))};
433 }
434 template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
435 {
436 return {private_init, data.apply(std::forward<F>(f), k)};
437 }
438
439 Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount) const
440 {
441 return {private_init, data.shifted(amount)};
442 }
443
444 template <std::size_t NN>
445 Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
446 const
447 {
448 return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
449 }
450
451 Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
452 {
453 return {private_init, data.rotated(amount)};
454 }
455
457 Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
458 {
459 return {private_init, exponent(data)};
460 }
461
462 Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x) const
463 {
464 return {private_init, data.interleaveLow(x.data)};
465 }
466 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x) const
467 {
468 return {private_init, data.interleaveHigh(x.data)};
469 }
470
471 Vc_INTRINSIC fixed_size_simd<T, N> reversed() const
472 {
473 return {private_init, data.reversed()};
474 }
475
476 Vc_INTRINSIC fixed_size_simd<T, N> sorted() const
477 {
478 return {private_init, data.sorted()};
479 }
480
481 template <class G, class = decltype(std::declval<G>()(std::size_t())),
482 class = enable_if<!Traits::is_simd_vector<G>::value>>
483 Vc_INTRINSIC SimdArray(const G &gen) : data(gen)
484 {
485 }
486 template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
487 {
488 return {private_init, VectorType::generate(gen)};
489 }
490
491 Vc_DEPRECATED("use copysign(x, y) instead")
492 Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
493 {
494 return {private_init, Vc::copysign(data, x.data)};
495 }
496
497 friend VectorType &internal_data<>(SimdArray &x);
498 friend const VectorType &internal_data<>(const SimdArray &x);
499
501 Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
502
503 Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
504
505private:
506 // The alignas attribute attached to the class declaration above is ignored by ICC
507 // 17.0.0 (at least). So just move the alignas attribute down here where it works for
508 // all compilers.
509 alignas(static_cast<std::size_t>(
510 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
511 VectorType_::size()>::value)) storage_type data;
512};
513template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
514template <typename T, std::size_t N, typename VectorType>
515constexpr std::size_t SimdArray<T, N, VectorType, N>::MemoryAlignment;
516template <typename T, std::size_t N, typename VectorType>
517#ifndef Vc_MSVC
518Vc_INTRINSIC
519#endif
520VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
521{
522 return x.data;
523}
524template <typename T, std::size_t N, typename VectorType>
525#ifndef Vc_MSVC
526Vc_INTRINSIC
527#endif
528const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
529{
530 return x.data;
531}
532
533// unwrap {{{2
534template <class T> Vc_INTRINSIC T unwrap(const T &x) { return x; }
535
536template <class T, size_t N, class V>
537Vc_INTRINSIC V unwrap(const SimdArray<T, N, V, N> &x)
538{
539 return internal_data(x);
540}
541
542template <class T, size_t Pieces, size_t Index>
543Vc_INTRINSIC auto unwrap(const Common::Segment<T, Pieces, Index> &x)
544 -> decltype(x.to_fixed_size())
545{
546 return unwrap(x.to_fixed_size());
547}
548
549// gatherImplementation {{{2
550template <typename T, std::size_t N, typename VectorType>
551template <class MT, class IT, int Scale>
552Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
553 const Common::GatherArguments<MT, IT, Scale> &args)
554{
555 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
556}
557template <typename T, std::size_t N, typename VectorType>
558template <class MT, class IT, int Scale>
559Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
560 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
561{
562 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
563 mask);
564}
565
566// scatterImplementation {{{2
567template <typename T, std::size_t N, typename VectorType>
568template <typename MT, typename IT>
569inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
570 IT &&indexes) const
571{
572 data.scatter(mem, unwrap(std::forward<IT>(indexes)));
573}
574template <typename T, std::size_t N, typename VectorType>
575template <typename MT, typename IT>
576inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
577 IT &&indexes,
578 MaskArgument mask) const
579{
580 data.scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
581}
582
583// generic SimdArray {{{1
616template <typename T, size_t N, typename V, size_t Wt> class SimdArray
617{
618 static_assert(std::is_same<T, double>::value ||
619 std::is_same<T, float>::value ||
620 std::is_same<T, int32_t>::value ||
621 std::is_same<T, uint32_t>::value ||
622 std::is_same<T, int16_t>::value ||
623 std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
624 static_assert(
625 std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
626 V::size() == Wt,
627 "ERROR: leave the third and fourth template parameters with their defaults. They "
628 "are implementation details.");
629 static_assert(
630 // either the EntryType and VectorEntryType of the main V are equal
631 std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
632 // or N is a multiple of V::size()
633 (N % V::size() == 0),
634 "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
635 "MIC::(u)short_v::size(), i.e. k * 16.");
636
637 using my_traits = SimdArrayTraits<T, N>;
638 static constexpr std::size_t N0 = my_traits::N0;
639 static constexpr std::size_t N1 = my_traits::N1;
640 using Split = Common::Split<N0>;
641 template <typename U, std::size_t K> using CArray = U[K];
642
643public:
644 static constexpr bool is_atomic = false;
645 using storage_type0 = typename my_traits::storage_type0;
646 using storage_type1 = typename my_traits::storage_type1;
647 static_assert(storage_type0::size() == N0, "");
648
652 using vector_type = V;
653 using vectorentry_type = typename storage_type0::vectorentry_type;
654 typedef vectorentry_type alias_type Vc_MAY_ALIAS;
655
657 using value_type = T;
658
661
664
675 static constexpr std::size_t size() { return N; }
676
680 using MaskType = Mask;
681 using MaskArgument = const MaskType &;
682 using VectorEntryType = vectorentry_type;
687 using AsArg = const SimdArray &;
688
689 using reference = Detail::ElementReference<SimdArray>;
690
692 static constexpr std::size_t MemoryAlignment =
693 storage_type0::MemoryAlignment > storage_type1::MemoryAlignment
694 ? storage_type0::MemoryAlignment
695 : storage_type1::MemoryAlignment;
696
699
701 static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
702 {
703 return SimdArray(Vc::Zero);
704 }
705
707 static Vc_INTRINSIC fixed_size_simd<T, N> One()
708 {
709 return SimdArray(Vc::One);
710 }
711
714 {
716 }
717
719 static Vc_INTRINSIC fixed_size_simd<T, N> Random()
720 {
721 return fromOperation(Common::Operations::random());
722 }
723
724 template <class G, class = decltype(std::declval<G>()(std::size_t())),
725 class = enable_if<!Traits::is_simd_vector<G>::value>>
726 Vc_INTRINSIC SimdArray(const G &gen)
727 : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); })
728 {
729 }
730
732 template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen) // {{{2
733 {
734 auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
735 // an initializer list is well-defined
736 // (front to back), but GCC 4.8 doesn't
737 // implement this correctly. Therefore
738 // we enforce correct order.
739 return {std::move(tmp),
740 storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
741 }
743
746
748 SimdArray() = default;
750
753
755 Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
756 template <
757 typename U,
758 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
759 SimdArray(U a)
760 : SimdArray(static_cast<value_type>(a))
761 {
762 }
764
765 // default copy ctor/operator
766 SimdArray(const SimdArray &) = default;
767 SimdArray(SimdArray &&) = default;
768 SimdArray &operator=(const SimdArray &) = default;
769
770 // load ctor
771 template <typename U, typename Flags = DefaultLoadTag,
772 typename = enable_if<std::is_arithmetic<U>::value &&
773 Traits::is_load_store_flag<Flags>::value>>
774 explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {})
775 : data0(mem, f), data1(mem + storage_type0::size(), f)
776 {
777 }
778
779// MSVC does overload resolution differently and takes the const U *mem overload (I hope)
780#ifndef Vc_MSVC
787 template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
788 typename = enable_if<std::is_arithmetic<U>::value &&
789 Traits::is_load_store_flag<Flags>::value>>
790 explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = {})
791 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
792 {
793 }
797 template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
798 typename = enable_if<std::is_arithmetic<U>::value &&
799 Traits::is_load_store_flag<Flags>::value>>
800 explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = {})
801 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
802 {
803 }
804#endif
805
806 // initializer list
807 Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
808 : data0(init.begin(), Vc::Unaligned)
809 , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
810 {
811 Vc_ASSERT(init.size() == size());
812 }
813
814#include "gatherinterface.h"
815#include "scatterinterface.h"
816
817 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
818 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
819 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
820 : data0(i)
821 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
822 storage_type0::size()>())
823 {
824 }
825 template <size_t Offset>
826 explicit Vc_INTRINSIC SimdArray(
827 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
828 : data0(i)
829 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
830 storage_type0::size() + Offset>())
831 {
832 }
833
834 // explicit casts
835 template <class W, class = enable_if<
836 (Traits::is_simd_vector<W>::value &&
837 Traits::simd_vector_size<W>::value == N &&
838 !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
839 Traits::isSimdArray<W>::value))>>
840 Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
841 {
842 }
843
844 // implicit casts
845 template <class W, class = enable_if<
846 (Traits::isSimdArray<W>::value &&
847 Traits::simd_vector_size<W>::value == N &&
848 std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
849 class = W>
850 Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
851 {
852 }
853
854 template <class W, std::size_t Pieces, std::size_t Index>
855 Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
856 : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
857 , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
858 {
859 }
860
861 // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
862 // T implicitly convertible to U
863 template <typename U, typename A,
864 typename =
865 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
866 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
867 operator Vector<U, A>() const
868 {
869 auto r = simd_cast<Vector<U, A>>(data0, data1);
870 return r;
871 }
872 Vc_INTRINSIC operator fixed_size_simd<T, N> &()
873 {
874 return static_cast<fixed_size_simd<T, N> &>(*this);
875 }
876 Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
877 {
878 return static_cast<const fixed_size_simd<T, N> &>(*this);
879 }
880
882
883 Vc_INTRINSIC void setZero()
884 {
885 data0.setZero();
886 data1.setZero();
887 }
888 Vc_INTRINSIC void setZero(const mask_type &k)
889 {
890 data0.setZero(Split::lo(k));
891 data1.setZero(Split::hi(k));
892 }
893 Vc_INTRINSIC void setZeroInverted()
894 {
895 data0.setZeroInverted();
896 data1.setZeroInverted();
897 }
898 Vc_INTRINSIC void setZeroInverted(const mask_type &k)
899 {
900 data0.setZeroInverted(Split::lo(k));
901 data1.setZeroInverted(Split::hi(k));
902 }
903
904
905 Vc_INTRINSIC void setQnan() {
906 data0.setQnan();
907 data1.setQnan();
908 }
909 Vc_INTRINSIC void setQnan(const mask_type &m) {
910 data0.setQnan(Split::lo(m));
911 data1.setQnan(Split::hi(m));
912 }
913
915 template <typename Op, typename... Args>
916 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
917 {
918 fixed_size_simd<T, N> r = {
919 storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
920 // could move and thus
921 // break the next line
922 storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
923 return r;
924 }
925
927 template <typename Op, typename... Args>
928 static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
929 {
930 storage_type0::callOperation(op, Split::lo(args)...);
931 storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
932 }
933
934
935 template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
936 {
937 data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
938 // break the next line
939 data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
940 }
941
942 template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
943 {
944 data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
945 // break the next line
946 data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
947 }
948
949 Vc_INTRINSIC mask_type operator!() const
950 {
951 return {!data0, !data1};
952 }
953
954 Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
955 {
956 return {-data0, -data1};
957 }
958
960 Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
961
962 Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
963 {
964 return {~data0, ~data1};
965 }
966
967 // left/right shift operators {{{2
968 template <typename U,
969 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
970 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
971 {
972 return {data0 << x, data1 << x};
973 }
974 template <typename U,
975 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
976 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
977 {
978 data0 <<= x;
979 data1 <<= x;
980 return *this;
981 }
982 template <typename U,
983 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
984 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
985 {
986 return {data0 >> x, data1 >> x};
987 }
988 template <typename U,
989 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
990 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
991 {
992 data0 >>= x;
993 data1 >>= x;
994 return *this;
995 }
996
997 // binary operators {{{2
998#define Vc_BINARY_OPERATOR_(op) \
999 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
1000 { \
1001 data0 op## = rhs.data0; \
1002 data1 op## = rhs.data1; \
1003 return *this; \
1004 }
1005 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1006 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1007 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1008#undef Vc_BINARY_OPERATOR_
1009
1010 // operator[] {{{2
1013
1014private:
1015 friend reference;
1016 Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
1017 {
1018 return reinterpret_cast<const alias_type *>(&o)[i];
1019 }
1020 template <typename U>
1021 Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
1022 noexcept(std::declval<value_type &>() = v))
1023 {
1024 reinterpret_cast<alias_type *>(&o)[i] = v;
1025 }
1026
1027public:
1029
1035 Vc_INTRINSIC reference operator[](size_t i) noexcept
1036 {
1037 static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1038 return {*this, int(i)};
1039 }
1040
1042 Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1043 {
1044 return get(*this, int(index));
1045 }
1047
1048 // operator(){{{2
1050 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1051 const mask_type &mask)
1052 {
1053 return {*this, mask};
1054 }
1055
1057 Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
1058 {
1059 data0.assign(v.data0, internal_data0(k));
1060 data1.assign(v.data1, internal_data1(k));
1061 }
1062
1063 // reductions {{{2
1064#define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1065private: \
1066 template <typename ForSfinae = void> \
1067 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1068 storage_type0::Size == storage_type1::Size, \
1069 value_type> name_##_impl() const \
1070 { \
1071 return binary_fun_(data0, data1).name_(); \
1072 } \
1073 \
1074 template <typename ForSfinae = void> \
1075 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1076 storage_type0::Size != storage_type1::Size, \
1077 value_type> name_##_impl() const \
1078 { \
1079 return scalar_fun_(data0.name_(), data1.name_()); \
1080 } \
1081 \
1082public: \
1083 \
1084 Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1085 \
1086 Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1087 { \
1088 if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1089 return data1.name_(Split::hi(mask)); \
1090 } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1091 return data0.name_(Split::lo(mask)); \
1092 } else { \
1093 return scalar_fun_(data0.name_(Split::lo(mask)), \
1094 data1.name_(Split::hi(mask))); \
1095 } \
1096 } \
1097 Vc_NOTHING_EXPECTING_SEMICOLON
1098 Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1099 Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1100 Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1101 Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1102#undef Vc_REDUCTION_FUNCTION_
1104 Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const //{{{2
1105 {
1106 auto ps0 = data0.partialSum();
1107 auto tmp = data1;
1108 tmp[0] += ps0[data0.size() - 1];
1109 return {std::move(ps0), tmp.partialSum()};
1110 }
1111
1112 // apply {{{2
1114 template <typename F> inline fixed_size_simd<T, N> apply(F &&f) const
1115 {
1116 return {data0.apply(f), data1.apply(f)};
1117 }
1119 template <typename F>
1120 inline fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
1121 {
1122 return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1123 }
1124
1125 // shifted {{{2
1127 inline fixed_size_simd<T, N> shifted(int amount) const
1128 {
1129 constexpr int SSize = Size;
1130 constexpr int SSize0 = storage_type0::Size;
1131 constexpr int SSize1 = storage_type1::Size;
1132 if (amount == 0) {
1133 return *this;
1134 }
1135 if (amount < 0) {
1136 if (amount > -SSize0) {
1137 return {data0.shifted(amount), data1.shifted(amount, data0)};
1138 }
1139 if (amount == -SSize0) {
1140 return {storage_type0(0), simd_cast<storage_type1>(data0)};
1141 }
1142 if (amount < -SSize0) {
1143 return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1144 amount + SSize0))};
1145 }
1146 return Zero();
1147 } else {
1148 if (amount >= SSize) {
1149 return Zero();
1150 } else if (amount >= SSize0) {
1151 return {
1152 simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1153 storage_type1(0)};
1154 } else if (amount >= SSize1) {
1155 return {data0.shifted(amount, data1), storage_type1(0)};
1156 } else {
1157 return {data0.shifted(amount, data1), data1.shifted(amount)};
1158 }
1159 }
1160 }
1161
1162 template <std::size_t NN>
1163 inline enable_if<
1164 !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1165 N == NN),
1167 shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1168 {
1169 constexpr int SSize = Size;
1170 if (amount < 0) {
1171 return fixed_size_simd<T, N>([&](int i) -> value_type {
1172 i += amount;
1173 if (i >= 0) {
1174 return operator[](i);
1175 } else if (i >= -SSize) {
1176 return shiftIn[i + SSize];
1177 }
1178 return 0;
1179 });
1180 }
1181 return fixed_size_simd<T, N>([&](int i) -> value_type {
1182 i += amount;
1183 if (i < SSize) {
1184 return operator[](i);
1185 } else if (i < 2 * SSize) {
1186 return shiftIn[i - SSize];
1187 }
1188 return 0;
1189 });
1190 }
1191
1192private:
1193 // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1194 // expression directly in the enable_if below
1195 template <std::size_t NN> struct bisectable_shift
1196 : public std::integral_constant<bool,
1197 std::is_same<storage_type0, storage_type1>::value && // bisectable
1198 N == NN>
1199 {
1200 };
1201
1202public:
1203 template <std::size_t NN>
1204 inline fixed_size_simd<T, N> shifted(
1205 enable_if<bisectable_shift<NN>::value, int> amount,
1206 const SimdArray<value_type, NN> &shiftIn) const
1207 {
1208 constexpr int SSize = Size;
1209 if (amount < 0) {
1210 if (amount > -static_cast<int>(storage_type0::Size)) {
1211 return {data0.shifted(amount, internal_data1(shiftIn)),
1212 data1.shifted(amount, data0)};
1213 }
1214 if (amount == -static_cast<int>(storage_type0::Size)) {
1215 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1216 }
1217 if (amount > -SSize) {
1218 return {
1219 internal_data1(shiftIn)
1220 .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1221 data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1222 }
1223 if (amount == -SSize) {
1224 return shiftIn;
1225 }
1226 if (amount > -2 * SSize) {
1227 return shiftIn.shifted(amount + SSize);
1228 }
1229 }
1230 if (amount == 0) {
1231 return *this;
1232 }
1233 if (amount < static_cast<int>(storage_type0::Size)) {
1234 return {data0.shifted(amount, data1),
1235 data1.shifted(amount, internal_data0(shiftIn))};
1236 }
1237 if (amount == static_cast<int>(storage_type0::Size)) {
1238 return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1239 }
1240 if (amount < SSize) {
1241 return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1242 internal_data0(shiftIn)
1243 .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1244 }
1245 if (amount == SSize) {
1246 return shiftIn;
1247 }
1248 if (amount < 2 * SSize) {
1249 return shiftIn.shifted(amount - SSize);
1250 }
1251 return Zero();
1252 }
1253
1254 // rotated {{{2
1256 Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
1257 {
1258 amount %= int(size());
1259 if (amount == 0) {
1260 return *this;
1261 } else if (amount < 0) {
1262 amount += size();
1263 }
1264
1265#ifdef Vc_MSVC
1266 // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1267 // ->
1268 // load to implement the function instead.
1269 alignas(MemoryAlignment) T tmp[N + data0.size()];
1270 data0.store(&tmp[0], Vc::Aligned);
1271 data1.store(&tmp[data0.size()], Vc::Aligned);
1272 data0.store(&tmp[N], Vc::Unaligned);
1274 r.data0.load(&tmp[amount], Vc::Unaligned);
1275 r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1276 return r;
1277#else
1278 auto &&d0cvtd = simd_cast<storage_type1>(data0);
1279 auto &&d1cvtd = simd_cast<storage_type0>(data1);
1280 constexpr int size0 = storage_type0::size();
1281 constexpr int size1 = storage_type1::size();
1282
1283 if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1284 return {std::move(d1cvtd), std::move(d0cvtd)};
1285 } else if (amount < size1) {
1286 return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1287 } else if (amount == size1) {
1288 return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1289 } else if (int(size()) - amount < size1) {
1290 return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1291 data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1292 } else if (int(size()) - amount == size1) {
1293 return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1294 simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1295 } else if (amount <= size0) {
1296 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1297 simd_cast<storage_type1>(data0.shifted(amount - size1))};
1298 } else {
1299 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1300 simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1301 }
1302 return *this;
1303#endif
1304 }
1305
1306 // interleaveLow/-High {{{2
1308 Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(const SimdArray &x) const
1309 {
1310 // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1311 return {data0.interleaveLow(x.data0),
1312 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1313 }
1315 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(const SimdArray &x) const
1316 {
1317 return interleaveHighImpl(
1318 x,
1319 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1320 }
1321
1322private:
1324 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::true_type) const
1325 {
1326 return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1327 }
1329 inline fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::false_type) const
1330 {
1331 return {data0.interleaveHigh(x.data0)
1332 .shifted(storage_type1::Size,
1333 simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1334 data1.interleaveHigh(x.data1)};
1335 }
1336
1337public:
1339 inline fixed_size_simd<T, N> reversed() const //{{{2
1340 {
1341 if (std::is_same<storage_type0, storage_type1>::value) {
1342 return {simd_cast<storage_type0>(data1).reversed(),
1343 simd_cast<storage_type1>(data0).reversed()};
1344 } else {
1345#ifdef Vc_MSVC
1346 // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1347 // store
1348 // -> load to implement the function instead.
1349 alignas(MemoryAlignment) T tmp[N];
1350 data1.reversed().store(&tmp[0], Vc::Aligned);
1351 data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1352 return fixed_size_simd<T, N>{&tmp[0], Vc::Aligned};
1353#else
1354 return {data0.shifted(storage_type1::Size, data1).reversed(),
1355 simd_cast<storage_type1>(data0.reversed().shifted(
1356 storage_type0::Size - storage_type1::Size))};
1357#endif
1358 }
1359 }
1361 inline fixed_size_simd<T, N> sorted() const //{{{2
1362 {
1363 return sortedImpl(
1364 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1365 }
1366
1368 Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::true_type) const
1369 {
1370#ifdef Vc_DEBUG_SORTED
1371 std::cerr << "-- " << data0 << data1 << '\n';
1372#endif
1373 const auto a = data0.sorted();
1374 const auto b = data1.sorted().reversed();
1375 const auto lo = Vc::min(a, b);
1376 const auto hi = Vc::max(a, b);
1377 return {lo.sorted(), hi.sorted()};
1378 }
1379
1381 Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type) const
1382 {
1383 using SortableArray =
1384 fixed_size_simd<value_type, Common::NextPowerOfTwo<size()>::value>;
1385 auto sortable = simd_cast<SortableArray>(*this);
1386 for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1387 using limits = std::numeric_limits<value_type>;
1388 if (limits::has_infinity) {
1389 sortable[i] = limits::infinity();
1390 } else {
1391 sortable[i] = std::numeric_limits<value_type>::max();
1392 }
1393 }
1394 return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1395
1396 /* The following implementation appears to be less efficient. But this may need further
1397 * work.
1398 const auto a = data0.sorted();
1399 const auto b = data1.sorted();
1400#ifdef Vc_DEBUG_SORTED
1401 std::cerr << "== " << a << b << '\n';
1402#endif
1403 auto aIt = Vc::begin(a);
1404 auto bIt = Vc::begin(b);
1405 const auto aEnd = Vc::end(a);
1406 const auto bEnd = Vc::end(b);
1407 return SimdArray::generate([&](std::size_t) {
1408 if (aIt == aEnd) {
1409 return *(bIt++);
1410 }
1411 if (bIt == bEnd) {
1412 return *(aIt++);
1413 }
1414 if (*aIt < *bIt) {
1415 return *(aIt++);
1416 } else {
1417 return *(bIt++);
1418 }
1419 });
1420 */
1421 }
1422
1425
1428 static constexpr std::size_t Size = size();
1429
1431 Vc_DEPRECATED("use exponent(x) instead")
1432 Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
1433 {
1434 return {exponent(data0), exponent(data1)};
1435 }
1436
1438 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1439 {
1440 return {isnegative(data0), isnegative(data1)};
1441 }
1442
1444 Vc_DEPRECATED("use copysign(x, y) instead")
1445 Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
1446 {
1447 return {Vc::copysign(data0, x.data0),
1448 Vc::copysign(data1, x.data1)};
1449 }
1451
1452 // internal_data0/1 {{{2
1453 friend storage_type0 &internal_data0<>(SimdArray &x);
1454 friend storage_type1 &internal_data1<>(SimdArray &x);
1455 friend const storage_type0 &internal_data0<>(const SimdArray &x);
1456 friend const storage_type1 &internal_data1<>(const SimdArray &x);
1457
1459 Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1460 : data0(std::move(x)), data1(std::move(y))
1461 {
1462 }
1463
1464 Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1465
1466private: //{{{2
1467 // The alignas attribute attached to the class declaration above is ignored by ICC
1468 // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1469 // all compilers.
1470 alignas(static_cast<std::size_t>(
1471 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1472 V::size()>::value)) storage_type0 data0;
1473 storage_type1 data1;
1474};
1475#undef Vc_CURRENT_CLASS_NAME
1476template <typename T, std::size_t N, typename V, std::size_t M>
1477constexpr std::size_t SimdArray<T, N, V, M>::Size;
1478template <typename T, std::size_t N, typename V, std::size_t M>
1479constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1480
1481// gatherImplementation {{{2
1482template <typename T, std::size_t N, typename VectorType, std::size_t M>
1483template <class MT, class IT, int Scale>
1485 const Common::GatherArguments<MT, IT, Scale> &args)
1486{
1487 data0.gather(Common::make_gather<Scale>(
1488 args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1489 data1.gather(Common::make_gather<Scale>(
1490 args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1491}
1492template <typename T, std::size_t N, typename VectorType, std::size_t M>
1493template <class MT, class IT, int Scale>
1494inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1495 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
1496{
1497 data0.gather(Common::make_gather<Scale>(
1498 args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1499 Split::lo(mask));
1500 data1.gather(Common::make_gather<Scale>(
1501 args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1502 Split::hi(mask));
1503}
1504
1505// scatterImplementation {{{2
1506template <typename T, std::size_t N, typename VectorType, std::size_t M>
1507template <typename MT, typename IT>
1508inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1509 IT &&indexes) const
1510{
1511 data0.scatter(mem, Split::lo(Common::Operations::gather(),
1512 indexes)); // don't forward indexes - it could move and
1513 // thus break the next line
1514 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1515}
1516template <typename T, std::size_t N, typename VectorType, std::size_t M>
1517template <typename MT, typename IT>
1518inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1519 IT &&indexes, MaskArgument mask) const
1520{
1521 data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1522 Split::lo(mask)); // don't forward indexes - it could move and
1523 // thus break the next line
1524 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1525 Split::hi(mask));
1526}
1527
1528// internal_data0/1 (SimdArray) {{{1
1530template <typename T, std::size_t N, typename V, std::size_t M>
1531#ifndef Vc_MSVC
1532Vc_INTRINSIC
1533#endif
1534typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1535 SimdArray<T, N, V, M> &x)
1536{
1537 return x.data0;
1538}
1540template <typename T, std::size_t N, typename V, std::size_t M>
1541#ifndef Vc_MSVC
1542Vc_INTRINSIC
1543#endif
1544typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1545 SimdArray<T, N, V, M> &x)
1546{
1547 return x.data1;
1548}
1550template <typename T, std::size_t N, typename V, std::size_t M>
1551#ifndef Vc_MSVC
1552Vc_INTRINSIC
1553#endif
1554const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1555 const SimdArray<T, N, V, M> &x)
1556{
1557 return x.data0;
1558}
1560template <typename T, std::size_t N, typename V, std::size_t M>
1561#ifndef Vc_MSVC
1562Vc_INTRINSIC
1563#endif
1564const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1565 const SimdArray<T, N, V, M> &x)
1566{
1567 return x.data1;
1568}
1569
1570// MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1571// MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1572// in the body the bug is supressed.
1573#if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1574template <>
1575Vc_INTRINSIC SimdArray<double, 8>::SimdArray(fixed_size_simd<double, 4> &&x,
1576 fixed_size_simd<double, 4> &&y)
1577 : data0(x), data1(0)
1578{
1579 data1 = y;
1580}
1581#endif
1582
1583// binary operators {{{
1584namespace Detail
1585{
1586#define Vc_FIXED_OP(op) \
1587 template <class T, int N, \
1588 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1589 Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1590 const fixed_size_simd<T, N> &b) \
1591 { \
1592 return {private_init, internal_data(a) op internal_data(b)}; \
1593 } \
1594 template <class T, int N, \
1595 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1596 class = T> \
1597 Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1598 const fixed_size_simd<T, N> &b) \
1599 { \
1600 return {internal_data0(a) op internal_data0(b), \
1601 internal_data1(a) op internal_data1(b)}; \
1602 }
1603Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1604Vc_ALL_BINARY(Vc_FIXED_OP);
1605Vc_ALL_SHIFTS(Vc_FIXED_OP);
1606#undef Vc_FIXED_OP
1607#define Vc_FIXED_OP(op) \
1608 template <class T, int N, \
1609 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1610 Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1611 const fixed_size_simd<T, N> &b) \
1612 { \
1613 return {private_init, internal_data(a) op internal_data(b)}; \
1614 } \
1615 template <class T, int N, \
1616 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1617 class = T> \
1618 Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1619 const fixed_size_simd<T, N> &b) \
1620 { \
1621 return {internal_data0(a) op internal_data0(b), \
1622 internal_data1(a) op internal_data1(b)}; \
1623 }
1624Vc_ALL_COMPARES(Vc_FIXED_OP);
1625#undef Vc_FIXED_OP
1626} // namespace Detail
1627
1628// }}}
1629// binary operators {{{1
1630namespace result_vector_type_internal
1631{
1632template <typename T>
1633using remove_cvref = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1634
1635template <typename T>
1636using is_integer_larger_than_int = std::integral_constant<
1637 bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1638 std::is_same<T, long>::value ||
1639 std::is_same<T, unsigned long>::value)>;
1640
1641template <
1642 typename L, typename R,
1643 std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1644 : Traits::simd_vector_size<R>::value,
1645 bool = (Traits::isSimdArray<L>::value ||
1646 Traits::isSimdArray<R>::value) && // one of the operands must be a SimdArray
1647 !(Traits::is_fixed_size_simd<L>::value && // if both are fixed_size, use
1648 Traits::is_fixed_size_simd<R>::value) && // common/operators.h
1649 ((std::is_arithmetic<remove_cvref<L>>::value && // one of the operands is a
1650 !is_integer_larger_than_int<remove_cvref<L>>::value) || // scalar type
1651 (std::is_arithmetic<remove_cvref<R>>::value &&
1652 !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1653 // or one of the operands is Vector<T> with Vector<T>::size() ==
1654 // SimdArray::size()
1655 Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value)>
1656struct evaluate;
1657
1658template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1659{
1660private:
1661 using LScalar = Traits::entry_type_of<L>;
1662 using RScalar = Traits::entry_type_of<R>;
1663
1664 template <bool B, typename T, typename F>
1665 using conditional = typename std::conditional<B, T, F>::type;
1666
1667public:
1668 // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1669 // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1670 // int are promoted to int before any operation). This would imply that SIMD types with integral
1671 // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1672 // start. Therefore we special-case those operations where the scalar type of both operands is
1673 // integral and smaller than int.
1674 // In addition, there is no generic support for 64-bit int SIMD types. Therefore
1675 // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1676 // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1677 // then the operator is disabled altogether. We do not want an implicit demotion.
1678 using type = fixed_size_simd<
1679 conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1680 sizeof(LScalar) < sizeof(int) &&
1681 sizeof(RScalar) < sizeof(int)),
1682 conditional<(sizeof(LScalar) == sizeof(RScalar)),
1683 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1684 conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1685 decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1686 N>;
1687};
1688
1689} // namespace result_vector_type_internal
1690
1691template <typename L, typename R>
1692using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1693
1694#define Vc_BINARY_OPERATORS_(op_) \
1695 \
1696 template <typename L, typename R> \
1697 Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1698 { \
1699 using Return = result_vector_type<L, R>; \
1700 return Vc::Detail::operator op_( \
1701 static_cast<const Return &>(std::forward<L>(lhs)), \
1702 static_cast<const Return &>(std::forward<R>(rhs))); \
1703 }
1722Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1723Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1725#undef Vc_BINARY_OPERATORS_
1726#define Vc_BINARY_OPERATORS_(op_) \
1727 \
1728 template <typename L, typename R> \
1729 Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1730 R &&rhs) \
1731 { \
1732 using Promote = result_vector_type<L, R>; \
1733 return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1734 }
1753Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1755#undef Vc_BINARY_OPERATORS_
1756
1757// math functions {{{1
1758#define Vc_FORWARD_UNARY_OPERATOR(name_) \
1759 \
1760 template <typename T, std::size_t N, typename V, std::size_t M> \
1761 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \
1762 { \
1763 return fixed_size_simd<T, N>::fromOperation( \
1764 Common::Operations::Forward_##name_(), x); \
1765 } \
1766 template <class T, int N> \
1767 fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \
1768 { \
1769 return fixed_size_simd<T, N>::fromOperation( \
1770 Common::Operations::Forward_##name_(), x); \
1771 } \
1772 Vc_NOTHING_EXPECTING_SEMICOLON
1773
1774#define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1775 \
1776 template <typename T, std::size_t N, typename V, std::size_t M> \
1777 inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \
1778 { \
1779 return fixed_size_simd_mask<T, N>::fromOperation( \
1780 Common::Operations::Forward_##name_(), x); \
1781 } \
1782 template <class T, int N> \
1783 fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \
1784 { \
1785 return fixed_size_simd_mask<T, N>::fromOperation( \
1786 Common::Operations::Forward_##name_(), x); \
1787 } \
1788 Vc_NOTHING_EXPECTING_SEMICOLON
1789
1790#define Vc_FORWARD_BINARY_OPERATOR(name_) \
1791 \
1792 template <typename T, std::size_t N, typename V, std::size_t M> \
1793 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \
1794 const SimdArray<T, N, V, M> &y) \
1795 { \
1796 return fixed_size_simd<T, N>::fromOperation( \
1797 Common::Operations::Forward_##name_(), x, y); \
1798 } \
1799 Vc_NOTHING_EXPECTING_SEMICOLON
1800
1806Vc_FORWARD_UNARY_OPERATOR(abs);
1807Vc_FORWARD_UNARY_OPERATOR(asin);
1808Vc_FORWARD_UNARY_OPERATOR(atan);
1809Vc_FORWARD_BINARY_OPERATOR(atan2);
1810Vc_FORWARD_UNARY_OPERATOR(ceil);
1811Vc_FORWARD_BINARY_OPERATOR(copysign);
1812Vc_FORWARD_UNARY_OPERATOR(cos);
1813Vc_FORWARD_UNARY_OPERATOR(exp);
1814Vc_FORWARD_UNARY_OPERATOR(exponent);
1815Vc_FORWARD_UNARY_OPERATOR(floor);
1817template <typename T, std::size_t N>
1819 const SimdArray<T, N> &c)
1820{
1821 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1822}
1823Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1824Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1825Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1826Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1828template <typename T, std::size_t N>
1830{
1831 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1832}
1834template <typename T, std::size_t N>
1836{
1837 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1838}
1839Vc_FORWARD_UNARY_OPERATOR(log);
1840Vc_FORWARD_UNARY_OPERATOR(log10);
1841Vc_FORWARD_UNARY_OPERATOR(log2);
1842Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1843Vc_FORWARD_UNARY_OPERATOR(round);
1844Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1845Vc_FORWARD_UNARY_OPERATOR(sin);
1847template <typename T, std::size_t N>
1849{
1850 SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1851}
1852Vc_FORWARD_UNARY_OPERATOR(sqrt);
1853Vc_FORWARD_UNARY_OPERATOR(trunc);
1854Vc_FORWARD_BINARY_OPERATOR(min);
1855Vc_FORWARD_BINARY_OPERATOR(max);
1857#undef Vc_FORWARD_UNARY_OPERATOR
1858#undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1859#undef Vc_FORWARD_BINARY_OPERATOR
1860
1861// simd_cast {{{1
1862#ifdef Vc_MSVC
1863#define Vc_DUMMY_ARG0 , int = 0
1864#define Vc_DUMMY_ARG1 , long = 0
1865#define Vc_DUMMY_ARG2 , short = 0
1866#define Vc_DUMMY_ARG3 , char = '0'
1867#define Vc_DUMMY_ARG4 , unsigned = 0u
1868#define Vc_DUMMY_ARG5 , unsigned short = 0u
1869#else
1870#define Vc_DUMMY_ARG0
1871#define Vc_DUMMY_ARG1
1872#define Vc_DUMMY_ARG2
1873#define Vc_DUMMY_ARG3
1874#define Vc_DUMMY_ARG4
1875#define Vc_DUMMY_ARG5
1876#endif // Vc_MSVC
1877
1878// simd_cast_impl_smaller_input {{{2
1879// The following function can be implemented without the sizeof...(From) overload.
1880// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1881// function in two works around the issue.
1882template <typename Return, std::size_t N, typename T, typename... From>
1883Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1884simd_cast_impl_smaller_input(const From &... xs, const T &last)
1885{
1886 Return r = simd_cast<Return>(xs...);
1887 for (size_t i = 0; i < N; ++i) {
1888 r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1889 }
1890 return r;
1891}
1892template <typename Return, std::size_t N, typename T>
1893Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1894{
1895 Return r = Return();
1896 for (size_t i = 0; i < N; ++i) {
1897 r[i] = static_cast<typename Return::EntryType>(last[i]);
1898 }
1899 return r;
1900}
1901template <typename Return, std::size_t N, typename T, typename... From>
1902Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1903 const From &... xs, const T &last)
1904{
1905 Return r = simd_cast<Return>(xs...);
1906 for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1907 r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1908 }
1909 return r;
1910}
1911template <typename Return, std::size_t N, typename T>
1912Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1913{
1914 Return r = Return();
1915 for (size_t i = 0; i < Return::size(); ++i) {
1916 r[i] = static_cast<typename Return::EntryType>(last[i]);
1917 }
1918 return r;
1919}
1920
1921// simd_cast_without_last (declaration) {{{2
1922template <typename Return, typename T, typename... From>
1923Vc_INTRINSIC_L Vc_CONST_L Return
1924 simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1925
1926// are_all_types_equal {{{2
1927template <typename... Ts> struct are_all_types_equal;
1928template <typename T>
1929struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1930{
1931};
1932template <typename T0, typename T1, typename... Ts>
1933struct are_all_types_equal<T0, T1, Ts...>
1934 : public std::integral_constant<
1935 bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1936{
1937};
1938
1939// simd_cast_interleaved_argument_order (declarations) {{{2
1959template <typename Return, typename... Ts>
1960Vc_INTRINSIC Vc_CONST Return
1961 simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1962
1963// simd_cast_with_offset (declarations and one impl) {{{2
1964// offset == 0 {{{3
1965template <typename Return, std::size_t offset, typename From, typename... Froms>
1966Vc_INTRINSIC Vc_CONST
1967 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1968 simd_cast_with_offset(const From &x, const Froms &... xs);
1969// offset > 0 && offset divisible by Return::Size {{{3
1970template <typename Return, std::size_t offset, typename From>
1971Vc_INTRINSIC Vc_CONST
1972 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1973 simd_cast_with_offset(const From &x);
1974// offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1975template <typename Return, std::size_t offset, typename From>
1976Vc_INTRINSIC Vc_CONST
1977 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1978 ((Traits::isSimdArray<Return>::value &&
1979 !Traits::isAtomicSimdArray<Return>::value) ||
1980 (Traits::isSimdMaskArray<Return>::value &&
1981 !Traits::isAtomicSimdMaskArray<Return>::value))),
1982 Return>
1983 simd_cast_with_offset(const From &x);
1984// offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1985template <typename Return, std::size_t offset, typename From>
1986Vc_INTRINSIC Vc_CONST
1987 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1988 ((Traits::isSimdArray<Return>::value &&
1989 Traits::isAtomicSimdArray<Return>::value) ||
1990 (Traits::isSimdMaskArray<Return>::value &&
1991 Traits::isAtomicSimdMaskArray<Return>::value))),
1992 Return>
1993 simd_cast_with_offset(const From &x);
1994// offset > first argument (drops first arg) {{{3
1995template <typename Return, std::size_t offset, typename From, typename... Froms>
1996Vc_INTRINSIC Vc_CONST enable_if<
1997 (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1998 simd_cast_with_offset(const From &, const Froms &... xs)
1999{
2000 return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2001}
2002
2003// offset > first and only argument (returns Zero) {{{3
2004template <typename Return, std::size_t offset, typename From>
2005Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2006 const From &)
2007{
2008 return Return(0);
2009}
2010
2011// first_type_of {{{2
2012template <typename T, typename... Ts> struct first_type_of_impl
2013{
2014 using type = T;
2015};
2016template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
2017
2018// simd_cast_drop_arguments (declarations) {{{2
2019template <typename Return, typename From>
2020Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2021template <typename Return, typename... Froms>
2022Vc_INTRINSIC Vc_CONST
2023 enable_if<(are_all_types_equal<Froms...>::value &&
2024 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2025 Return>
2026 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2027// The following function can be implemented without the sizeof...(From) overload.
2028// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2029// function in two works around the issue.
2030template <typename Return, typename From, typename... Froms>
2031Vc_INTRINSIC Vc_CONST enable_if<
2032 (are_all_types_equal<From, Froms...>::value &&
2033 (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2034 Return>
2035simd_cast_drop_arguments(Froms... xs, From x, From);
2036template <typename Return, typename From>
2037Vc_INTRINSIC Vc_CONST
2038 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2039 simd_cast_drop_arguments(From x, From);
2040
2041namespace
2042{
2043#ifdef Vc_DEBUG_SIMD_CAST
2044void debugDoNothing(const std::initializer_list<void *> &) {}
2045template <typename T0, typename... Ts>
2046inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
2047 const Ts &... args)
2048{
2049 std::cerr << prefix << arg0;
2050 debugDoNothing({&(std::cerr << ", " << args)...});
2051 std::cerr << suffix;
2052}
2053#else
2054template <typename T0, typename... Ts>
2055Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2056{
2057}
2058#endif
2059} // unnamed namespace
2060
2061// is_less trait{{{2
2062template <size_t A, size_t B>
2063struct is_less : public std::integral_constant<bool, (A < B)> {
2064};
2065
2066// is_power_of_2 trait{{{2
2067template <size_t N>
2068struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2069};
2070
2071// simd_cast<T>(xs...) to SimdArray/-mask {{{2
2072#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2073 template <typename Return, typename T, typename A, typename... Froms> \
2074 Vc_INTRINSIC Vc_CONST enable_if< \
2075 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2076 is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2077 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2078 !detail::is_fixed_size_abi<A>::value), \
2079 Return> \
2080 simd_cast(NativeType_<T, A> x, Froms... xs) \
2081 { \
2082 vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2083 return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2084 } \
2085 template <typename Return, typename T, typename A, typename... Froms> \
2086 Vc_INTRINSIC Vc_CONST enable_if< \
2087 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2088 !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2089 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2090 !detail::is_fixed_size_abi<A>::value), \
2091 Return> \
2092 simd_cast(NativeType_<T, A> x, Froms... xs) \
2093 { \
2094 vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2095 return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2096 } \
2097 template <typename Return, typename T, typename A, typename... Froms> \
2098 Vc_INTRINSIC Vc_CONST \
2099 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2100 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2101 is_less<Common::left_size<Return::Size>(), \
2102 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2103 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2104 !detail::is_fixed_size_abi<A>::value), \
2105 Return> \
2106 simd_cast(NativeType_<T, A> x, Froms... xs) \
2107 { \
2108 vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2109 using R0 = typename Return::storage_type0; \
2110 using R1 = typename Return::storage_type1; \
2111 return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2112 simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2113 } \
2114 template <typename Return, typename T, typename A, typename... Froms> \
2115 Vc_INTRINSIC Vc_CONST \
2116 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2117 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2118 !is_less<Common::left_size<Return::Size>(), \
2119 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2120 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2121 !detail::is_fixed_size_abi<A>::value), \
2122 Return> \
2123 simd_cast(NativeType_<T, A> x, Froms... xs) \
2124 { \
2125 vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2126 using R0 = typename Return::storage_type0; \
2127 using R1 = typename Return::storage_type1; \
2128 return {simd_cast<R0>(x, xs...), R1(0)}; \
2129 } \
2130 Vc_NOTHING_EXPECTING_SEMICOLON
2131
2132Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2133Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2134#undef Vc_SIMDARRAY_CASTS
2135
2136// simd_cast<SimdArray/-mask, offset>(V) {{{2
2137#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2138 /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2139 template <typename Return, int offset, typename T, typename A> \
2140 Vc_INTRINSIC Vc_CONST \
2141 enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2142 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2143 { \
2144 vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2145 return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2146 } \
2147 /* both halves of Return array are extracted from argument */ \
2148 template <typename Return, int offset, typename T, typename A> \
2149 Vc_INTRINSIC Vc_CONST \
2150 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2151 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2152 Return::Size * offset + Common::left_size<Return::Size>() < \
2153 NativeType_<T, A>::Size), \
2154 Return> \
2155 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2156 { \
2157 vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2158 using R0 = typename Return::storage_type0; \
2159 constexpr int entries_offset = offset * Return::Size; \
2160 constexpr int entries_offset_right = entries_offset + R0::Size; \
2161 return { \
2162 simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2163 simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2164 x)}; \
2165 } \
2166 /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2167 /* right half of Return array is zero */ \
2168 template <typename Return, int offset, typename T, typename A> \
2169 Vc_INTRINSIC Vc_CONST \
2170 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2171 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2172 Return::Size * offset + Common::left_size<Return::Size>() >= \
2173 NativeType_<T, A>::Size), \
2174 Return> \
2175 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2176 { \
2177 vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2178 using R0 = typename Return::storage_type0; \
2179 using R1 = typename Return::storage_type1; \
2180 constexpr int entries_offset = offset * Return::Size; \
2181 return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2182 } \
2183 Vc_NOTHING_EXPECTING_SEMICOLON
2184
2185Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2186Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2187#undef Vc_SIMDARRAY_CASTS
2188
2189// simd_cast<T>(xs...) from SimdArray/-mask {{{2
2190#define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2191 /* indivisible SimdArrayType_ */ \
2192 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2193 Vc_INTRINSIC Vc_CONST \
2194 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2195 (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2196 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2197 Return> \
2198 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2199 { \
2200 vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2201 return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2202 } \
2203 /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2204 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2205 Vc_INTRINSIC Vc_CONST \
2206 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2207 (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2208 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2209 Return> \
2210 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2211 { \
2212 vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2213 return simd_cast_without_last<Return, \
2214 typename SimdArrayType_<T, N, V, N>::storage_type, \
2215 typename From::storage_type...>( \
2216 internal_data(x0), internal_data(xs)...); \
2217 } \
2218 /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2219 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2220 typename... From> \
2221 Vc_INTRINSIC Vc_CONST enable_if< \
2222 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2223 !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2224 is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2225 Return> \
2226 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2227 { \
2228 vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2229 return simd_cast_interleaved_argument_order< \
2230 Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2231 typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2232 internal_data1(x0), internal_data1(xs)...); \
2233 } \
2234 /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2235 * input can be dropped */ \
2236 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2237 typename... From> \
2238 Vc_INTRINSIC Vc_CONST enable_if< \
2239 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2240 !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2241 Return> \
2242 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2243 { \
2244 vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2245 return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2246 x0, xs...); \
2247 } \
2248 /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2249 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2250 typename... From> \
2251 Vc_INTRINSIC Vc_CONST enable_if< \
2252 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2253 N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2254 Return> \
2255 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2256 { \
2257 vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2258 return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2259 From...>(x0, xs...); \
2260 } \
2261 /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2262 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2263 typename... From> \
2264 Vc_INTRINSIC Vc_CONST enable_if< \
2265 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2266 N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2267 Return> \
2268 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2269 { \
2270 vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2271 return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2272 From...>(x0, xs...); \
2273 } \
2274 /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2275 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2276 Vc_INTRINSIC Vc_CONST \
2277 enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2278 simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2279 { \
2280 vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2281 return simd_cast<Return>(internal_data0(x)); \
2282 } \
2283 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2284 Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2285 N < 2 * Return::Size && is_power_of_2<N>::value), \
2286 Return> \
2287 simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2288 { \
2289 vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2290 return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2291 } \
2292 Vc_NOTHING_EXPECTING_SEMICOLON
2293
2294Vc_SIMDARRAY_CASTS(SimdArray);
2295Vc_SIMDARRAY_CASTS(SimdMaskArray);
2296#undef Vc_SIMDARRAY_CASTS
2297template <class Return, class T, int N, class... Ts,
2298 class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2299Vc_INTRINSIC Return simd_cast(const fixed_size_simd<T, N> &x, const Ts &... xs)
2300{
2301 return simd_cast<Return>(static_cast<const SimdArray<T, N> &>(x),
2302 static_cast<const SimdArray<T, N> &>(xs)...);
2303}
2304template <class Return, class T, int N, class... Ts,
2305 class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2306Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask<T, N> &x, const Ts &... xs)
2307{
2308 return simd_cast<Return>(static_cast<const SimdMaskArray<T, N> &>(x),
2309 static_cast<const SimdMaskArray<T, N> &>(xs)...);
2310}
2311
2312// simd_cast<T, offset>(SimdArray/-mask) {{{2
2313#define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2314 /* offset == 0 is like without offset */ \
2315 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2316 std::size_t M> \
2317 Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2318 const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2319 { \
2320 vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2321 return simd_cast<Return>(x); \
2322 } \
2323 /* forward to V */ \
2324 template <typename Return, int offset, typename T, std::size_t N, typename V> \
2325 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2326 const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2327 { \
2328 vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2329 return simd_cast<Return, offset>(internal_data(x)); \
2330 } \
2331 /* convert from right member of SimdArray */ \
2332 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2333 std::size_t M> \
2334 Vc_INTRINSIC Vc_CONST \
2335 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2336 offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2337 Return> \
2338 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2339 { \
2340 vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2341 return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2342 internal_data1(x)); \
2343 } \
2344 /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2345 * left side of the SimdArray */ \
2346 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2347 std::size_t M> \
2348 Vc_INTRINSIC Vc_CONST \
2349 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2350 offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2351 Return> \
2352 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2353 { \
2354 vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2355 return simd_cast_with_offset<Return, \
2356 offset * Return::Size - Common::left_size<N>()>( \
2357 internal_data1(x)); \
2358 } \
2359 /* convert from left member of SimdArray */ \
2360 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2361 std::size_t M> \
2362 Vc_INTRINSIC Vc_CONST enable_if< \
2363 (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2364 offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2365 Return> \
2366 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2367 { \
2368 vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2369 return simd_cast<Return, offset>(internal_data0(x)); \
2370 } \
2371 /* fallback to copying scalars */ \
2372 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2373 std::size_t M> \
2374 Vc_INTRINSIC Vc_CONST \
2375 enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2376 offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2377 Return> \
2378 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2379 { \
2380 vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2381 using R = typename Return::EntryType; \
2382 Return r = Return(0); \
2383 for (std::size_t i = offset * Return::Size; \
2384 i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2385 r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2386 } \
2387 return r; \
2388 } \
2389 Vc_NOTHING_EXPECTING_SEMICOLON
2390Vc_SIMDARRAY_CASTS(SimdArray);
2391Vc_SIMDARRAY_CASTS(SimdMaskArray);
2392#undef Vc_SIMDARRAY_CASTS
2393// simd_cast_drop_arguments (definitions) {{{2
2394template <typename Return, typename From>
2395Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2396{
2397 return simd_cast<Return>(x);
2398}
2399template <typename Return, typename... Froms>
2400Vc_INTRINSIC Vc_CONST
2401 enable_if<(are_all_types_equal<Froms...>::value &&
2402 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2403 Return>
2404 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2405{
2406 return simd_cast<Return>(xs..., x);
2407}
2408// The following function can be implemented without the sizeof...(From) overload.
2409// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2410// function in two works around the issue.
2411template <typename Return, typename From, typename... Froms>
2412Vc_INTRINSIC Vc_CONST enable_if<
2413 (are_all_types_equal<From, Froms...>::value &&
2414 (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2415 Return>
2416simd_cast_drop_arguments(Froms... xs, From x, From)
2417{
2418 return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2419}
2420template <typename Return, typename From>
2421Vc_INTRINSIC Vc_CONST
2422 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2423 simd_cast_drop_arguments(From x, From)
2424{
2425 return simd_cast_drop_arguments<Return>(x);
2426}
2427
2428// simd_cast_with_offset (definitions) {{{2
2429 template <typename Return, std::size_t offset, typename From>
2430 Vc_INTRINSIC Vc_CONST
2431 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2432 Return> simd_cast_with_offset(const From &x)
2433{
2434 return simd_cast<Return, offset / Return::Size>(x);
2435}
2436template <typename Return, std::size_t offset, typename From>
2437Vc_INTRINSIC Vc_CONST
2438 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2439 ((Traits::isSimdArray<Return>::value &&
2440 !Traits::isAtomicSimdArray<Return>::value) ||
2441 (Traits::isSimdMaskArray<Return>::value &&
2442 !Traits::isAtomicSimdMaskArray<Return>::value))),
2443 Return>
2444 simd_cast_with_offset(const From &x)
2445{
2446 using R0 = typename Return::storage_type0;
2447 using R1 = typename Return::storage_type1;
2448 return {simd_cast_with_offset<R0, offset>(x),
2449 simd_cast_with_offset<R1, offset + R0::Size>(x)};
2450}
2451template <typename Return, std::size_t offset, typename From>
2452Vc_INTRINSIC Vc_CONST
2453 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2454 ((Traits::isSimdArray<Return>::value &&
2455 Traits::isAtomicSimdArray<Return>::value) ||
2456 (Traits::isSimdMaskArray<Return>::value &&
2457 Traits::isAtomicSimdMaskArray<Return>::value))),
2458 Return>
2459 simd_cast_with_offset(const From &x)
2460{
2461 return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2462}
2463template <typename Return, std::size_t offset, typename From, typename... Froms>
2464Vc_INTRINSIC Vc_CONST
2465 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2466 simd_cast_with_offset(const From &x, const Froms &... xs)
2467{
2468 return simd_cast<Return>(x, xs...);
2469}
2470
2471// simd_cast_without_last (definition) {{{2
2472template <typename Return, typename T, typename... From>
2473Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2474{
2475 return simd_cast<Return>(xs...);
2476}
2477
2478// simd_cast_interleaved_argument_order (definitions) {{{2
2479
2480#ifdef Vc_MSVC
2481// MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2482// is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2483// MSVC do the right thing.
2484template <std::size_t I, typename T0>
2485Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2486{
2487 return a0;
2488}
2489template <std::size_t I, typename T0>
2490Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2491{
2492 return b0;
2493}
2494#endif // Vc_MSVC
2495
2497template <std::size_t I, typename T0, typename... Ts>
2498Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2499 const Ts &...,
2500 const T0 &,
2501 const Ts &...)
2502{
2503 return a0;
2504}
2506template <std::size_t I, typename T0, typename... Ts>
2507Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2508 const Ts &...,
2509 const T0 &b0,
2510 const Ts &...)
2511{
2512 return b0;
2513}
2515template <std::size_t I, typename T0, typename... Ts>
2516Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2517 const Ts &... a,
2518 const T0 &,
2519 const Ts &... b)
2520{
2521 return extract_interleaved<I - 2, Ts...>(a..., b...);
2522}
2524template <typename Return, typename... Ts, std::size_t... Indexes>
2525Vc_INTRINSIC Vc_CONST Return
2526 simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2527 const Ts &... b)
2528{
2529 return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2530}
2533template <typename Return, typename... Ts>
2534Vc_INTRINSIC Vc_CONST Return
2535 simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2536{
2537 using seq = make_index_sequence<sizeof...(Ts)*2>;
2538 return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2539}
2540
2541// conditional_assign {{{1
2542#define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2543 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2544 typename U> \
2545 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2546 SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2547 { \
2548 lhs(mask) op_ rhs; \
2549 } \
2550 Vc_NOTHING_EXPECTING_SEMICOLON
2551Vc_CONDITIONAL_ASSIGN( Assign, =);
2552Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2553Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2554Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2555Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2556Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2557Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2558Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2559Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2560Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2561Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2562#undef Vc_CONDITIONAL_ASSIGN
2563
2564#define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2565 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2566 Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2567 conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2568 { \
2569 return expr_; \
2570 } \
2571 Vc_NOTHING_EXPECTING_SEMICOLON
2572Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2573Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2574Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2575Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2576#undef Vc_CONDITIONAL_ASSIGN
2577// transpose_impl {{{1
2578namespace Common
2579{
2580template <typename T, size_t N, typename V>
2581inline void transpose_impl(
2582 TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2583 const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2584 SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2585{
2586 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2587 &internal_data(*r[2]), &internal_data(*r[3])};
2588 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2589 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2590 internal_data(std::get<1>(proxy.in)),
2591 internal_data(std::get<2>(proxy.in)),
2592 internal_data(std::get<3>(proxy.in))});
2593}
2594
2595template <typename T, typename V>
2596inline void transpose_impl(
2597 TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2598 const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2599 SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2600{
2601 auto &lo = *r[0];
2602 auto &hi = *r[1];
2603 internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2604 internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2605 internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2606 internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2607 internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2608 internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2609 internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2610 internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2611}
2612
2613template <typename T, typename V>
2614inline void transpose_impl(
2615 TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2616 const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2617 SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2618{
2619 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2620 &internal_data(*r[2]), &internal_data(*r[3])};
2621 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2622 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2623 internal_data(std::get<1>(proxy.in)),
2624 internal_data(std::get<2>(proxy.in)),
2625 internal_data(std::get<3>(proxy.in))});
2626}
2627
2628template <typename T, size_t N, typename V>
2629inline void transpose_impl(
2630 TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2631 const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2632 SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2633{
2634 SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2635 SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2636 using H = SimdArray<T, 2>;
2637 transpose_impl(TransposeTag<2, 4>(), &r0[0],
2638 TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2639 internal_data0(std::get<1>(proxy.in)),
2640 internal_data0(std::get<2>(proxy.in)),
2641 internal_data0(std::get<3>(proxy.in))});
2642 transpose_impl(TransposeTag<2, 4>(), &r1[0],
2643 TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2644 internal_data1(std::get<1>(proxy.in)),
2645 internal_data1(std::get<2>(proxy.in)),
2646 internal_data1(std::get<3>(proxy.in))});
2647}
2648
2649/* TODO:
2650template <typename T, std::size_t N, typename V, std::size_t VSize>
2651inline enable_if<(N > VSize), void> transpose_impl(
2652 std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2653 const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2654 SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2655{
2656 typedef SimdArray<T, N, V, VSize> SA;
2657 std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2658 {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2659 &internal_data0(*r[3])}};
2660 transpose_impl(
2661 r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2662 typename SA::storage_type0, typename SA::storage_type0>{
2663 internal_data0(std::get<0>(proxy.in)),
2664 internal_data0(std::get<1>(proxy.in)),
2665 internal_data0(std::get<2>(proxy.in)),
2666 internal_data0(std::get<3>(proxy.in))});
2667
2668 std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2669 {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2670 &internal_data1(*r[3])}};
2671 transpose_impl(
2672 r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2673 typename SA::storage_type1, typename SA::storage_type1>{
2674 internal_data1(std::get<0>(proxy.in)),
2675 internal_data1(std::get<1>(proxy.in)),
2676 internal_data1(std::get<2>(proxy.in)),
2677 internal_data1(std::get<3>(proxy.in))});
2678}
2679*/
2680} // namespace Common
2681
2682// }}}1
2683namespace Detail
2684{
2685// InterleaveImpl for SimdArrays {{{
2686// atomic {{{1
2687template <class T, size_t N, class V, size_t VSizeof>
2688struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2689 template <class I, class... VV>
2690 static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2691 {
2692 InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2693 }
2694 template <class I, class... VV>
2695 static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2696 {
2697 InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2698 }
2699};
2700
2701// generic (TODO) {{{1
2702/*
2703template <class T, size_t N, class V, size_t Wt, size_t VSizeof>
2704struct InterleaveImpl<SimdArray<T, N, V, Wt>, N, VSizeof> {
2705 using SA = SimdArray<T, N, V, Wt>;
2706 using SA0 = typename SA::storage_type0;
2707 using SA1 = typename SA::storage_type1;
2708
2709 template <class I, class... VV>
2710 static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2711 {
2712 InterleaveImpl<SA0, SA0::size(), sizeof(SA0)>::interleave(
2713 data, i, // i needs to be split
2714 internal_data0(vv)...);
2715 InterleaveImpl<SA1, SA1::size(), sizeof(SA1)>::interleave(
2716 data, // how far to advance data?
2717 i, // i needs to be split
2718 internal_data1(vv)...);
2719 }
2720 template <class I, class... VV>
2721 static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2722 {
2723 InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2724 }
2725};
2726*/
2727} // namespace Detail
2728// }}}
2730
2731} // namespace Vc_VERSIONED_NAMESPACE
2732
2733// numeric_limits {{{1
2734namespace std
2735{
2736template <typename T, size_t N, typename V, size_t VN>
2737struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2738private:
2740
2741public:
2742 static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2743 static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2744 static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2745 {
2746 return numeric_limits<T>::lowest();
2747 }
2748 static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2749 {
2750 return numeric_limits<T>::epsilon();
2751 }
2752 static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2753 {
2754 return numeric_limits<T>::round_error();
2755 }
2756 static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2757 {
2758 return numeric_limits<T>::infinity();
2759 }
2760 static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2761 {
2762 return numeric_limits<T>::quiet_NaN();
2763 }
2764 static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2765 {
2766 return numeric_limits<T>::signaling_NaN();
2767 }
2768 static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2769 {
2770 return numeric_limits<T>::denorm_min();
2771 }
2772};
2773} // namespace std
2774//}}}1
2775
2776#endif // VC_COMMON_SIMDARRAY_H_
2777
2778// vim: foldmethod=marker
The main SIMD mask class.
Definition mask.h:42
Data-parallel arithmetic type with user-defined number of elements.
Definition simdarray.h:617
fixed_size_simd< T, N > sorted() const
Return a sorted copy of the vector.
Definition simdarray.h:1361
fixed_size_simd< T, N > apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition simdarray.h:1114
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Definition simdarray.h:1050
static fixed_size_simd< T, N > IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition simdarray.h:713
fixed_size_simd< T, N > partialSum() const
Returns a vector containing the sum of all entries with smaller index.
Definition simdarray.h:1104
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition simdarray.h:1035
fixed_size_simd< T, N > shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition simdarray.h:1127
SimdArray()=default
Construct a zero-initialized vector object.
static fixed_size_simd< T, N > generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition simdarray.h:732
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition simdarray.h:675
SimdArray(value_type a)
Definition simdarray.h:755
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition simdarray.h:1042
static fixed_size_simd< T, N > One()
Returns a vector with the entries initialized to one.
Definition simdarray.h:707
fixed_size_simd< T, N > rotated(int amount) const
Rotate vector entries to the left by amount.
Definition simdarray.h:1256
static fixed_size_simd< T, N > Random()
Returns a vector with pseudo-random entries.
Definition simdarray.h:719
fixed_size_simd< T, N > apply(F &&f, const mask_type &k) const
Definition simdarray.h:1120
static fixed_size_simd< T, N > Zero()
Returns a vector with the entries initialized to zero.
Definition simdarray.h:701
fixed_size_simd< T, N > reversed() const
Returns a vector with all components reversed.
Definition simdarray.h:1339
value_type EntryType
The type of the elements (i.e. T)
Definition simdarray.h:684
fixed_size_simd< T, N > operator+() const
Returns a copy of itself.
Definition simdarray.h:960
T value_type
The type of the elements (i.e. T)
Definition simdarray.h:657
The main vector class for expressing data parallelism.
Definition vector.h:126
Vector sorted() const
Return a sorted copy of the vector.
Vector reversed() const
Returns a vector with all components reversed.
fixed_size_simd< T, N > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std::max function component-wise and concurrently.
Definition simdarray.h:1855
fixed_size_simd< T, N > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std::min function component-wise and concurrently.
Definition simdarray.h:1854
fixed_size_simd_mask< T, N > isinf(const SimdArray< T, N, V, M > &x)
Applies the std::isinf function component-wise and concurrently.
Definition simdarray.h:1824
fixed_size_simd< T, N > reciprocal(const SimdArray< T, N, V, M > &x)
Applies the std::reciprocal function component-wise and concurrently.
Definition simdarray.h:1842
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
Definition simdarray.h:1818
fixed_size_simd< T, N > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std::copysign function component-wise and concurrently.
Definition simdarray.h:1811
fixed_size_simd_mask< T, N > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std::isnegative function component-wise and concurrently.
Definition simdarray.h:1826
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition simdarray.h:1848
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
Definition simdarray.h:1829
fixed_size_simd< T, N > rsqrt(const SimdArray< T, N, V, M > &x)
Applies the std::rsqrt function component-wise and concurrently.
Definition simdarray.h:1844
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
Definition simdarray.h:1835
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std::exponent function component-wise and concurrently.
Definition simdarray.h:1814
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition simdize.h:1069
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition types.h:91
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true.
Definition types.h:86
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
Definition interleave.h:55
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition types.h:81
void deinterleave(V *a, V *b, const M *memory, A align)
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition vector.h:215
Vector Classes Namespace.
Definition dox.h:585
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
To simd_cast()
A cast from nothing results in default-initialization of To.
Definition simd_cast.h:64
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition simdize.h:1221