Vc 1.4.5
SIMD Vector Classes for C++
 
Loading...
Searching...
No Matches
simdarrayhelper.h
1/* This file is part of the Vc library. {{{
2Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the names of contributing organizations nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26}}}*/
27
28#ifndef VC_COMMON_SIMDARRAYHELPER_H_
29#define VC_COMMON_SIMDARRAYHELPER_H_
30
31#include "macros.h"
32
33namespace Vc_VERSIONED_NAMESPACE
34{
35// private_init {{{
36namespace
37{
38static constexpr struct private_init_t {} private_init = {};
39} // unnamed namespace
40// }}}
41
42namespace Common
43{
44
47
48namespace Operations/*{{{*/
49{
50struct tag {};
51#define Vc_DEFINE_OPERATION(name_) \
52 struct name_ : public tag { \
53 template <typename V, typename... Args> \
54 Vc_INTRINSIC void operator()(V &v, Args &&... args) \
55 { \
56 v.name_(std::forward<Args>(args)...); \
57 } \
58 }
59Vc_DEFINE_OPERATION(gather);
60Vc_DEFINE_OPERATION(scatter);
61Vc_DEFINE_OPERATION(load);
62Vc_DEFINE_OPERATION(store);
63Vc_DEFINE_OPERATION(setZero);
64Vc_DEFINE_OPERATION(setZeroInverted);
65Vc_DEFINE_OPERATION(assign);
66#undef Vc_DEFINE_OPERATION
67#define Vc_DEFINE_OPERATION(name_, code_) \
68 struct name_ : public tag { \
69 template <typename V> Vc_INTRINSIC void operator()(V &v) { code_; } \
70 }
71Vc_DEFINE_OPERATION(increment, ++(v));
72Vc_DEFINE_OPERATION(decrement, --(v));
73Vc_DEFINE_OPERATION(random, v = V::Random());
74#undef Vc_DEFINE_OPERATION
75#define Vc_DEFINE_OPERATION_FORWARD(name_) \
76 struct Forward_##name_ : public tag \
77 { \
78 template <typename... Args, typename = decltype(name_(std::declval<Args>()...))> \
79 Vc_INTRINSIC void operator()(decltype(name_(std::declval<Args>()...)) &v, \
80 Args &&... args) \
81 { \
82 v = name_(std::forward<Args>(args)...); \
83 } \
84 template <typename... Args, typename = decltype(name_(std::declval<Args>()...))> \
85 Vc_INTRINSIC void operator()(std::nullptr_t, Args && ... args) \
86 { \
87 name_(std::forward<Args>(args)...); \
88 } \
89 }
90Vc_DEFINE_OPERATION_FORWARD(abs);
91Vc_DEFINE_OPERATION_FORWARD(asin);
92Vc_DEFINE_OPERATION_FORWARD(atan);
93Vc_DEFINE_OPERATION_FORWARD(atan2);
94Vc_DEFINE_OPERATION_FORWARD(cos);
95Vc_DEFINE_OPERATION_FORWARD(ceil);
96Vc_DEFINE_OPERATION_FORWARD(copysign);
97Vc_DEFINE_OPERATION_FORWARD(exp);
98Vc_DEFINE_OPERATION_FORWARD(exponent);
99Vc_DEFINE_OPERATION_FORWARD(fma);
100Vc_DEFINE_OPERATION_FORWARD(floor);
101Vc_DEFINE_OPERATION_FORWARD(frexp);
102Vc_DEFINE_OPERATION_FORWARD(isfinite);
103Vc_DEFINE_OPERATION_FORWARD(isinf);
104Vc_DEFINE_OPERATION_FORWARD(isnan);
105Vc_DEFINE_OPERATION_FORWARD(isnegative);
106Vc_DEFINE_OPERATION_FORWARD(ldexp);
107Vc_DEFINE_OPERATION_FORWARD(log);
108Vc_DEFINE_OPERATION_FORWARD(log10);
109Vc_DEFINE_OPERATION_FORWARD(log2);
110Vc_DEFINE_OPERATION_FORWARD(reciprocal);
111Vc_DEFINE_OPERATION_FORWARD(round);
112Vc_DEFINE_OPERATION_FORWARD(rsqrt);
113Vc_DEFINE_OPERATION_FORWARD(sin);
114Vc_DEFINE_OPERATION_FORWARD(sincos);
115Vc_DEFINE_OPERATION_FORWARD(sqrt);
116Vc_DEFINE_OPERATION_FORWARD(trunc);
117Vc_DEFINE_OPERATION_FORWARD(min);
118Vc_DEFINE_OPERATION_FORWARD(max);
119#undef Vc_DEFINE_OPERATION_FORWARD
120template<typename T> using is_operation = std::is_base_of<tag, T>;
121} // namespace Operations }}}
122
130template <typename T_, std::size_t Pieces_, std::size_t Index_> struct Segment/*{{{*/
131{
132 static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report.");
133
134 using type = T_;
135 using type_decayed = typename std::decay<type>::type;
136 static constexpr std::size_t Pieces = Pieces_;
137 static constexpr std::size_t Index = Index_;
138 using fixed_size_type =
139 fixed_size_simd<conditional_t<Traits::is_simd_vector<type_decayed>::value,
140 typename type_decayed::EntryType, float>,
141 type_decayed::Size / Pieces>;
142
143 type data;
144
145 static constexpr std::size_t EntryOffset = Index * type_decayed::Size / Pieces;
146
147 // no non-const operator[] needed
148 decltype(std::declval<const type &>()[0]) operator[](size_t i) const { return data[i + EntryOffset]; }
149
150 fixed_size_type to_fixed_size() const
151 {
152 return simd_cast<fixed_size_type, Index>(data);
153 }
154};/*}}}*/
155
156//Segment<T *, ...> specialization {{{
157template <typename T_, std::size_t Pieces_, std::size_t Index_>
158struct Segment<T_ *, Pieces_, Index_> {
159 static_assert(Index_ < Pieces_, "You found a bug in Vc. Please report.");
160
161 using type = T_ *;
162 using type_decayed = typename std::decay<T_>::type;
163 static constexpr size_t Pieces = Pieces_;
164 static constexpr size_t Index = Index_;
165 using fixed_size_type = fixed_size_simd<
166 typename std::conditional<Traits::is_simd_vector<type_decayed>::value,
167 typename type_decayed::VectorEntryType, float>::type,
168 type_decayed::Size / Pieces> *;
169
170 type data;
171
172 static constexpr std::size_t EntryOffset = Index * type_decayed::size() / Pieces;
173
174 fixed_size_type to_fixed_size() const
175 {
176 return reinterpret_cast<
177#ifdef Vc_GCC
178 // GCC might ICE if this type is declared with may_alias. If it doesn't
179 // ICE it warns about ignoring the attribute.
180 typename std::remove_pointer<fixed_size_type>::type
181#else
182 MayAlias<typename std::remove_pointer<fixed_size_type>::type>
183#endif
184 *>(data) +
185 Index;
186 }
187
188 //decltype(std::declval<type>()[0]) operator[](size_t i) { return data[i + EntryOffset]; }
189 //decltype(std::declval<type>()[0]) operator[](size_t i) const { return data[i + EntryOffset]; }
190};/*}}}*/
191
201template <typename T, std::size_t Offset> struct AddOffset
202{
203 constexpr AddOffset() = default;
204};
205
206// class Split {{{1
215template <std::size_t secondOffset> class Split
216{
217 // split composite SimdArray
218 template <typename U, std::size_t N, typename V, std::size_t M,
219 typename = enable_if<N != M>>
220 static Vc_INTRINSIC auto loImpl(const SimdArray<U, N, V, M> &x)
221 -> decltype(internal_data0(x))
222 {
223 return internal_data0(x);
224 }
225 template <typename U, std::size_t N, typename V, std::size_t M,
226 typename = enable_if<N != M>>
227 static Vc_INTRINSIC auto hiImpl(const SimdArray<U, N, V, M> &x)
228 -> decltype(internal_data1(x))
229 {
230 return internal_data1(x);
231 }
232 template <typename U, std::size_t N, typename V, std::size_t M,
233 typename = enable_if<N != M>>
234 static Vc_INTRINSIC auto loImpl(SimdArray<U, N, V, M> *x)
235 -> decltype(&internal_data0(*x))
236 {
237 return &internal_data0(*x);
238 }
239 template <typename U, std::size_t N, typename V, std::size_t M,
240 typename = enable_if<N != M>>
241 static Vc_INTRINSIC auto hiImpl(SimdArray<U, N, V, M> *x)
242 -> decltype(&internal_data1(*x))
243 {
244 return &internal_data1(*x);
245 }
246
247 // split atomic SimdArray
248 template <typename U, std::size_t N, typename V>
249 static Vc_INTRINSIC Segment<V, 2, 0> loImpl(const SimdArray<U, N, V, N> &x)
250 {
251 return {internal_data(x)};
252 }
253 template <typename U, std::size_t N, typename V>
254 static Vc_INTRINSIC Segment<V, 2, 1> hiImpl(const SimdArray<U, N, V, N> &x)
255 {
256 return {internal_data(x)};
257 }
258 template <typename U, std::size_t N, typename V>
259 static Vc_INTRINSIC Segment<V *, 2, 0> loImpl(SimdArray<U, N, V, N> *x)
260 {
261 return {&internal_data(*x)};
262 }
263 template <typename U, std::size_t N, typename V>
264 static Vc_INTRINSIC Segment<V *, 2, 1> hiImpl(SimdArray<U, N, V, N> *x)
265 {
266 return {&internal_data(*x)};
267 }
268
269 // split composite SimdMaskArray
270 template <typename U, std::size_t N, typename V, std::size_t M>
271 static Vc_INTRINSIC auto loImpl(const SimdMaskArray<U, N, V, M> &x) -> decltype(internal_data0(x))
272 {
273 return internal_data0(x);
274 }
275 template <typename U, std::size_t N, typename V, std::size_t M>
276 static Vc_INTRINSIC auto hiImpl(const SimdMaskArray<U, N, V, M> &x) -> decltype(internal_data1(x))
277 {
278 return internal_data1(x);
279 }
280
281 template <typename U, std::size_t N, typename V>
282 static Vc_INTRINSIC Segment<typename SimdMaskArray<U, N, V, N>::mask_type, 2, 0> loImpl(
283 const SimdMaskArray<U, N, V, N> &x)
284 {
285 return {internal_data(x)};
286 }
287 template <typename U, std::size_t N, typename V>
288 static Vc_INTRINSIC Segment<typename SimdMaskArray<U, N, V, N>::mask_type, 2, 1> hiImpl(
289 const SimdMaskArray<U, N, V, N> &x)
290 {
291 return {internal_data(x)};
292 }
293
294 // split Vector<T> and Mask<T>
295#ifdef Vc_IMPL_AVX
296 template <class T>
297 static Vc_INTRINSIC SSE::Vector<T> loImpl(Vector<T, VectorAbi::Avx> &&x)
298 {
299 return simd_cast<SSE::Vector<T>, 0>(x);
300 }
301 template <class T>
302 static Vc_INTRINSIC SSE::Vector<T> hiImpl(Vector<T, VectorAbi::Avx> &&x)
303 {
304 return simd_cast<SSE::Vector<T>, 1>(x);
305 }
306 template <class T>
307 static Vc_INTRINSIC SSE::Mask<T> loImpl(Mask<T, VectorAbi::Avx> &&x)
308 {
309 return simd_cast<SSE::Mask<T>, 0>(x);
310 }
311 template <class T>
312 static Vc_INTRINSIC SSE::Mask<T> hiImpl(Mask<T, VectorAbi::Avx> &&x)
313 {
314 return simd_cast<SSE::Mask<T>, 1>(x);
315 }
316#endif // Vc_IMPL_AVX
317 template <typename T>
318 static constexpr bool is_vector_or_mask(){
319 return (Traits::is_simd_vector<T>::value && !Traits::isSimdArray<T>::value) ||
320 (Traits::is_simd_mask<T>::value && !Traits::isSimdMaskArray<T>::value);
321 }
322 template <typename V>
323 static Vc_INTRINSIC Segment<V, 2, 0> loImpl(V &&x, enable_if<is_vector_or_mask<V>()> = nullarg)
324 {
325 return {std::forward<V>(x)};
326 }
327 template <typename V>
328 static Vc_INTRINSIC Segment<V, 2, 1> hiImpl(V &&x, enable_if<is_vector_or_mask<V>()> = nullarg)
329 {
330 return {std::forward<V>(x)};
331 }
332
333 // split std::vector<T>
334 template <class T, class A>
335 static Vc_INTRINSIC const T *loImpl(const std::vector<T, A> &x)
336 {
337 return x.data();
338 }
339 template <class T, class A>
340 static Vc_INTRINSIC const T *hiImpl(const std::vector<T, A> &x)
341 {
342 return x.data() + secondOffset;
343 }
344
345 // generically split Segments
346 template <typename V, std::size_t Pieces, std::size_t Index>
347 static Vc_INTRINSIC Segment<V, 2 * Pieces, 2 * Index> loImpl(
348 const Segment<V, Pieces, Index> &x)
349 {
350 return {x.data};
351 }
352 template <typename V, std::size_t Pieces, std::size_t Index>
353 static Vc_INTRINSIC Segment<V, 2 * Pieces, 2 * Index + 1> hiImpl(
354 const Segment<V, Pieces, Index> &x)
355 {
356 return {x.data};
357 }
358
363 template <typename T, typename = decltype(loImpl(std::declval<T>()))>
364 static std::true_type have_lo_impl(int);
365 template <typename T> static std::false_type have_lo_impl(float);
366 template <typename T> static constexpr bool have_lo_impl()
367 {
368 return decltype(have_lo_impl<T>(1))::value;
369 }
370
371 template <typename T, typename = decltype(hiImpl(std::declval<T>()))>
372 static std::true_type have_hi_impl(int);
373 template <typename T> static std::false_type have_hi_impl(float);
374 template <typename T> static constexpr bool have_hi_impl()
375 {
376 return decltype(have_hi_impl<T>(1))::value;
377 }
379
380public:
388 template <typename U>
389 static Vc_INTRINSIC const U *lo(Operations::gather, const U *ptr)
390 {
391 return ptr;
392 }
393 template <typename U>
394 static Vc_INTRINSIC const U *hi(Operations::gather, const U *ptr)
395 {
396 return ptr + secondOffset;
397 }
398 template <typename U, typename = enable_if<!std::is_pointer<U>::value>>
399 static Vc_ALWAYS_INLINE decltype(loImpl(std::declval<U>()))
400 lo(Operations::gather, U &&x)
401 {
402 return loImpl(std::forward<U>(x));
403 }
404 template <typename U, typename = enable_if<!std::is_pointer<U>::value>>
405 static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval<U>()))
406 hi(Operations::gather, U &&x)
407 {
408 return hiImpl(std::forward<U>(x));
409 }
410 template <typename U>
411 static Vc_INTRINSIC const U *lo(Operations::scatter, const U *ptr)
412 {
413 return ptr;
414 }
415 template <typename U>
416 static Vc_INTRINSIC const U *hi(Operations::scatter, const U *ptr)
417 {
418 return ptr + secondOffset;
419 }
421
433 template <typename U>
434 static Vc_ALWAYS_INLINE decltype(loImpl(std::declval<U>())) lo(U &&x)
435 {
436 return loImpl(std::forward<U>(x));
437 }
438 template <typename U>
439 static Vc_ALWAYS_INLINE decltype(hiImpl(std::declval<U>())) hi(U &&x)
440 {
441 return hiImpl(std::forward<U>(x));
442 }
443
444 template <typename U>
445 static Vc_ALWAYS_INLINE enable_if<!have_lo_impl<U>(), U> lo(U &&x)
446 {
447 return std::forward<U>(x);
448 }
449 template <typename U>
450 static Vc_ALWAYS_INLINE enable_if<!have_hi_impl<U>(), U> hi(U &&x)
451 {
452 return std::forward<U>(x);
453 }
455};
456
457// actual_value {{{1
458template <typename Op, typename U, std::size_t M, typename V>
459static Vc_INTRINSIC const V &actual_value(Op, const SimdArray<U, M, V, M> &x)
460{
461 return internal_data(x);
462}
463template <typename Op, typename U, std::size_t M, typename V>
464static Vc_INTRINSIC V *actual_value(Op, SimdArray<U, M, V, M> *x)
465{
466 return &internal_data(*x);
467}
468template <typename Op, typename T, size_t Pieces, size_t Index>
469static Vc_INTRINSIC typename Segment<T, Pieces, Index>::fixed_size_type actual_value(
470 Op, Segment<T, Pieces, Index> &&seg)
471{
472 return seg.to_fixed_size();
473}
474
475template <typename Op, typename U, std::size_t M, typename V>
476static Vc_INTRINSIC const typename V::Mask &actual_value(Op, const SimdMaskArray<U, M, V, M> &x)
477{
478 return internal_data(x);
479}
480template <typename Op, typename U, std::size_t M, typename V>
481static Vc_INTRINSIC typename V::Mask *actual_value(Op, SimdMaskArray<U, M, V, M> *x)
482{
483 return &internal_data(*x);
484}
485
486// unpackArgumentsAuto {{{1
502
503
505template <typename Op, typename Arg>
506Vc_INTRINSIC decltype(actual_value(std::declval<Op &>(), std::declval<Arg>()))
507conditionalUnpack(std::true_type, Op op, Arg &&arg)
508{
509 return actual_value(op, std::forward<Arg>(arg));
510}
512template <typename Op, typename Arg>
513Vc_INTRINSIC Arg conditionalUnpack(std::false_type, Op, Arg &&arg)
514{
515 return std::forward<Arg>(arg);
516}
517
519template <size_t A, size_t B>
520struct selectorType : public std::integral_constant<bool, !((A & (size_t(1) << B)) != 0)> {
521};
522
524template <size_t I, typename Op, typename R, typename... Args, size_t... Indexes>
525Vc_INTRINSIC decltype(std::declval<Op &>()(std::declval<R &>(),
526 conditionalUnpack(selectorType<I, Indexes>(),
527 std::declval<Op &>(),
528 std::declval<Args>())...))
529unpackArgumentsAutoImpl(int, index_sequence<Indexes...>, Op op, R &&r, Args &&... args)
530{
531 op(std::forward<R>(r),
532 conditionalUnpack(selectorType<I, Indexes>(), op, std::forward<Args>(args))...);
533}
534
536template <size_t I, typename Op, typename R, typename... Args, size_t... Indexes>
537Vc_INTRINSIC enable_if<(I <= (size_t(1) << sizeof...(Args))), void> unpackArgumentsAutoImpl(
538 float, index_sequence<Indexes...> is, Op op, R &&r, Args &&... args)
539{
540 // if R is nullptr_t then the return type cannot enforce that actually any unwrapping
541 // of the SimdArray types happens. Thus, you could get an endless loop of the
542 // SimdArray function overload calling itself, if the index goes up to (1 <<
543 // sizeof...(Args)) - 1 (which means no argument transformations via actual_value).
544 static_assert(
545 I < (1 << sizeof...(Args)) - (std::is_same<R, std::nullptr_t>::value ? 1 : 0),
546 "Vc or compiler bug. Please report. Failed to find a combination of "
547 "actual_value(arg) transformations that allows calling Op.");
548 unpackArgumentsAutoImpl<I + 1, Op, R, Args...>(int(), is, op, std::forward<R>(r),
549 std::forward<Args>(args)...);
550}
551
552#ifdef Vc_ICC
553template <size_t, typename... Ts> struct IccWorkaround {
554 using type = void;
555};
556template <typename... Ts> struct IccWorkaround<2, Ts...> {
557 using type = typename std::remove_pointer<typename std::decay<
558 typename std::tuple_element<1, std::tuple<Ts...>>::type>::type>::type;
559};
560#endif
561
563template <typename Op, typename R, typename... Args>
564Vc_INTRINSIC void unpackArgumentsAuto(Op op, R &&r, Args &&... args)
565{
566#ifdef Vc_ICC
567 // ugly hacky workaround for ICC:
568 // The compiler fails to do SFINAE right on recursion. We have to hit the right
569 // recursionStart number from the start.
570 const int recursionStart =
571 Traits::isSimdArray<
572 typename IccWorkaround<sizeof...(Args), Args...>::type>::value &&
573 (std::is_same<Op, Common::Operations::Forward_frexp>::value ||
574 std::is_same<Op, Common::Operations::Forward_ldexp>::value)
575 ? 2
576 : 0;
577#else
578 const int recursionStart = 0;
579#endif
580 unpackArgumentsAutoImpl<recursionStart>(
581 int(), make_index_sequence<sizeof...(Args)>(), op, std::forward<R>(r),
582 std::forward<Args>(args)...);
583}
585
586//}}}1
588} // namespace Common
589} // namespace Vc
590
591#endif // VC_COMMON_SIMDARRAYHELPER_H_
592
593// vim: foldmethod=marker
#define Vc_GCC
This macro is defined to a number identifying the GCC version if the current translation unit is comp...
Definition global.h:75