Vc 1.4.5
SIMD Vector Classes for C++
 
Loading...
Searching...
No Matches
gatherimplementation.h
1/* This file is part of the Vc library. {{{
2Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the names of contributing organizations nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26}}}*/
27
28#ifndef VC_COMMON_GATHERIMPLEMENTATION_H_
29#define VC_COMMON_GATHERIMPLEMENTATION_H_
30
31#include "macros.h"
32
33namespace Vc_VERSIONED_NAMESPACE
34{
35namespace Common
36{
37
38enum class GatherScatterImplementation : int {
39 SimpleLoop,
40 SetIndexZero,
41 BitScanLoop,
42 PopcntSwitch
43};
44
45using SimpleLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SimpleLoop>;
46using SetIndexZeroT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::SetIndexZero>;
47using BitScanLoopT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::BitScanLoop>;
48using PopcntSwitchT = std::integral_constant<GatherScatterImplementation, GatherScatterImplementation::PopcntSwitch>;
49
50template <typename V, typename MT, typename IT>
51Vc_ALWAYS_INLINE void executeGather(SetIndexZeroT,
52 V &v,
53 const MT *mem,
54 IT &&indexes_,
55 typename V::MaskArgument mask)
56{
57 auto indexes = std::forward<IT>(indexes_);
58 indexes.setZeroInverted(static_cast<decltype(!indexes)>(mask));
59 const V tmp(mem, indexes);
60 where(mask) | v = tmp;
61}
62
63template <typename V, typename MT, typename IT>
64Vc_ALWAYS_INLINE void executeGather(SimpleLoopT, V &v, const MT *mem, const IT &indexes,
65 const typename V::MaskArgument mask)
66{
67 if (Vc_IS_UNLIKELY(mask.isEmpty())) {
68 return;
69 }
70#if defined Vc_GCC && Vc_GCC >= 0x40900
71 // GCC 4.8 doesn't support dependent type and constexpr vector_size argument
72 constexpr std::size_t Sizeof = sizeof(V);
73 using Builtin [[gnu::vector_size(Sizeof)]] = typename V::value_type;
74 Builtin tmp = reinterpret_cast<Builtin>(v.data());
75 Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
76 if (mask[i]) {
77 tmp[i] = mem[indexes[i]];
78 }
79 });
80 v.data() = reinterpret_cast<typename V::VectorType>(tmp);
81#else
82 Common::unrolled_loop<std::size_t, 0, V::Size>([&](std::size_t i) {
83 if (mask[i])
84 v[i] = mem[indexes[i]];
85 });
86#endif
87}
88
89template <typename V, typename MT, typename IT>
90Vc_ALWAYS_INLINE void executeGather(BitScanLoopT,
91 V &v,
92 const MT *mem,
93 const IT &indexes,
94 typename V::MaskArgument mask)
95{
96#ifdef Vc_GNU_ASM
97 size_t bits = mask.toInt();
98 while (Vc_IS_LIKELY(bits > 0)) {
99 size_t i, j;
100 asm("bsf %[bits],%[i]\n\t"
101 "bsr %[bits],%[j]\n\t"
102 "btr %[i],%[bits]\n\t"
103 "btr %[j],%[bits]\n\t"
104 : [i] "=r"(i), [j] "=r"(j), [bits] "+r"(bits));
105 v[i] = mem[indexes[i]];
106 v[j] = mem[indexes[j]];
107 }
108#else
109 // Alternative from Vc::SSE (0.7)
110 int bits = mask.toInt();
111 while (bits) {
112 const int i = _bit_scan_forward(bits);
113 bits &= bits - 1;
114 v[i] = mem[indexes[i]];
115 }
116#endif // Vc_GNU_ASM
117}
118
119template <typename V, typename MT, typename IT>
120Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
121 V &v,
122 const MT *mem,
123 const IT &indexes,
124 typename V::MaskArgument mask,
125 enable_if<V::Size == 16> = nullarg)
126{
127 unsigned int bits = mask.toInt();
128 unsigned int low, high = 0;
129 switch (Vc::Detail::popcnt16(bits)) {
130 case 16:
131 v.gather(mem, indexes);
132 break;
133 case 15:
134 low = _bit_scan_forward(bits);
135 bits ^= 1 << low;
136 v[low] = mem[indexes[low]];
137 // fallthrough
138 case 14:
139 high = _bit_scan_reverse(bits);
140 v[high] = mem[indexes[high]];
141 high = (1 << high);
142 // fallthrough
143 case 13:
144 low = _bit_scan_forward(bits);
145 bits ^= high | (1 << low);
146 v[low] = mem[indexes[low]];
147 // fallthrough
148 case 12:
149 high = _bit_scan_reverse(bits);
150 v[high] = mem[indexes[high]];
151 high = (1 << high);
152 // fallthrough
153 case 11:
154 low = _bit_scan_forward(bits);
155 bits ^= high | (1 << low);
156 v[low] = mem[indexes[low]];
157 // fallthrough
158 case 10:
159 high = _bit_scan_reverse(bits);
160 v[high] = mem[indexes[high]];
161 high = (1 << high);
162 // fallthrough
163 case 9:
164 low = _bit_scan_forward(bits);
165 bits ^= high | (1 << low);
166 v[low] = mem[indexes[low]];
167 // fallthrough
168 case 8:
169 high = _bit_scan_reverse(bits);
170 v[high] = mem[indexes[high]];
171 high = (1 << high);
172 // fallthrough
173 case 7:
174 low = _bit_scan_forward(bits);
175 bits ^= high | (1 << low);
176 v[low] = mem[indexes[low]];
177 // fallthrough
178 case 6:
179 high = _bit_scan_reverse(bits);
180 v[high] = mem[indexes[high]];
181 high = (1 << high);
182 // fallthrough
183 case 5:
184 low = _bit_scan_forward(bits);
185 bits ^= high | (1 << low);
186 v[low] = mem[indexes[low]];
187 // fallthrough
188 case 4:
189 high = _bit_scan_reverse(bits);
190 v[high] = mem[indexes[high]];
191 high = (1 << high);
192 // fallthrough
193 case 3:
194 low = _bit_scan_forward(bits);
195 bits ^= high | (1 << low);
196 v[low] = mem[indexes[low]];
197 // fallthrough
198 case 2:
199 high = _bit_scan_reverse(bits);
200 v[high] = mem[indexes[high]];
201 // fallthrough
202 case 1:
203 low = _bit_scan_forward(bits);
204 v[low] = mem[indexes[low]];
205 // fallthrough
206 case 0:
207 break;
208 }
209}
210template <typename V, typename MT, typename IT>
211Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
212 V &v,
213 const MT *mem,
214 const IT &indexes,
215 typename V::MaskArgument mask,
216 enable_if<V::Size == 8> = nullarg)
217{
218 unsigned int bits = mask.toInt();
219 unsigned int low, high = 0;
220 switch (Vc::Detail::popcnt8(bits)) {
221 case 8:
222 v.gather(mem, indexes);
223 break;
224 case 7:
225 low = _bit_scan_forward(bits);
226 bits ^= 1 << low;
227 v[low] = mem[indexes[low]];
228 // fallthrough
229 case 6:
230 high = _bit_scan_reverse(bits);
231 v[high] = mem[indexes[high]];
232 high = (1 << high);
233 // fallthrough
234 case 5:
235 low = _bit_scan_forward(bits);
236 bits ^= high | (1 << low);
237 v[low] = mem[indexes[low]];
238 // fallthrough
239 case 4:
240 high = _bit_scan_reverse(bits);
241 v[high] = mem[indexes[high]];
242 high = (1 << high);
243 // fallthrough
244 case 3:
245 low = _bit_scan_forward(bits);
246 bits ^= high | (1 << low);
247 v[low] = mem[indexes[low]];
248 // fallthrough
249 case 2:
250 high = _bit_scan_reverse(bits);
251 v[high] = mem[indexes[high]];
252 // fallthrough
253 case 1:
254 low = _bit_scan_forward(bits);
255 v[low] = mem[indexes[low]];
256 // fallthrough
257 case 0:
258 break;
259 }
260}
261template <typename V, typename MT, typename IT>
262Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
263 V &v,
264 const MT *mem,
265 const IT &indexes,
266 typename V::MaskArgument mask,
267 enable_if<V::Size == 4> = nullarg)
268{
269 unsigned int bits = mask.toInt();
270 unsigned int low, high = 0;
271 switch (Vc::Detail::popcnt4(bits)) {
272 case 4:
273 v.gather(mem, indexes);
274 break;
275 case 3:
276 low = _bit_scan_forward(bits);
277 bits ^= 1 << low;
278 v[low] = mem[indexes[low]];
279 // fallthrough
280 case 2:
281 high = _bit_scan_reverse(bits);
282 v[high] = mem[indexes[high]];
283 // fallthrough
284 case 1:
285 low = _bit_scan_forward(bits);
286 v[low] = mem[indexes[low]];
287 // fallthrough
288 case 0:
289 break;
290 }
291}
292template <typename V, typename MT, typename IT>
293Vc_ALWAYS_INLINE void executeGather(PopcntSwitchT,
294 V &v,
295 const MT *mem,
296 const IT &indexes,
297 typename V::MaskArgument mask,
298 enable_if<V::Size == 2> = nullarg)
299{
300 unsigned int bits = mask.toInt();
301 unsigned int low;
302 switch (Vc::Detail::popcnt4(bits)) {
303 case 2:
304 v.gather(mem, indexes);
305 break;
306 case 1:
307 low = _bit_scan_forward(bits);
308 v[low] = mem[indexes[low]];
309 // fallthrough
310 case 0:
311 break;
312 }
313}
314
315} // namespace Common
316} // namespace Vc
317
318#endif // VC_COMMON_GATHERIMPLEMENTATION_H_
constexpr WhereImpl::WhereMask< M > where(const M &mask)
Conditional assignment.
Definition where.h:265