Vc 1.4.5
SIMD Vector Classes for C++
 
Loading...
Searching...
No Matches
logarithm.h
1/* This file is part of the Vc library. {{{
2Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the names of contributing organizations nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26}}}*/
27
28/* The log implementations are based on code from Julien Pommier which carries the following
29 copyright information:
30 */
31/*
32 Inspired by Intel Approximate Math library, and based on the
33 corresponding algorithms of the cephes math library
34*/
35/* Copyright (C) 2007 Julien Pommier
36
37 This software is provided 'as-is', without any express or implied
38 warranty. In no event will the authors be held liable for any damages
39 arising from the use of this software.
40
41 Permission is granted to anyone to use this software for any purpose,
42 including commercial applications, and to alter it and redistribute it
43 freely, subject to the following restrictions:
44
45 1. The origin of this software must not be misrepresented; you must not
46 claim that you wrote the original software. If you use this software
47 in a product, an acknowledgment in the product documentation would be
48 appreciated but is not required.
49 2. Altered source versions must be plainly marked as such, and must not be
50 misrepresented as being the original software.
51 3. This notice may not be removed or altered from any source distribution.
52
53 (this is the zlib license)
54*/
55
56#ifdef Vc_COMMON_MATH_H_INTERNAL
57
58enum LogarithmBase {
59 BaseE, Base10, Base2
60};
61
62namespace Detail
63{
64template <typename T, typename Abi>
65using Const = typename std::conditional<std::is_same<Abi, VectorAbi::Avx>::value,
66 AVX::Const<T>, SSE::Const<T>>::type;
67
68template<LogarithmBase Base>
69struct LogImpl
70{
71 template<typename T, typename Abi> static Vc_ALWAYS_INLINE void log_series(Vector<T, Abi> &Vc_RESTRICT x, typename Vector<T, Abi>::AsArg exponent) {
72 typedef Vector<T, Abi> V;
73 typedef Detail::Const<T, Abi> C;
74 // Taylor series around x = 2^exponent
75 // f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large
76 // f'(x) = x⁻¹ → x → 1
77 // f''(x) = - x⁻² → -x² / 2 → C::_1_2()
78 // = 2!x⁻³ → x³ / 3 → C::P(8)
79 // = -3!x⁻⁴ → -x⁴ / 4 → C::P(7)
80 // = 4!x⁻⁵ → x⁵ / 5 → C::P(6)
81 // ...
82 // The high order coefficients are adjusted to reduce the error that occurs from ommission
83 // of higher order terms.
84 // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
85 // The order of additions must go from smallest to largest terms
86 const V x2 = x * x; // 0 → 4
87#ifdef Vc_LOG_ILP
88 V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
89 V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
90 V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
91 const V x3 = x2 * x; // 7 → 11
92 const V x6 = x3 * x3; // 11 → 15
93 const V x9 = x6 * x3; // 15 → 19
94 V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
95#elif defined Vc_LOG_ILP2
96 /*
97 * name start done
98 * movaps %xmm0, %xmm1 ; x 0 1
99 * movaps %xmm0, %xmm2 ; x 0 1
100 * mulps %xmm1, %xmm1 ; x2 1 5 *xmm1
101 * movaps <P8>, %xmm15 ; y8 1 2
102 * mulps %xmm1, %xmm2 ; x3 5 9 *xmm2
103 * movaps %xmm1, %xmm3 ; x2 5 6
104 * movaps %xmm1, %xmm4 ; x2 5 6
105 * mulps %xmm3, %xmm3 ; x4 6 10 *xmm3
106 * movaps %xmm2, %xmm5 ; x3 9 10
107 * movaps %xmm2, %xmm6 ; x3 9 10
108 * mulps %xmm2, %xmm4 ; x5 9 13 *xmm4
109 * movaps %xmm3, %xmm7 ; x4 10 11
110 * movaps %xmm3, %xmm8 ; x4 10 11
111 * movaps %xmm3, %xmm9 ; x4 10 11
112 * mulps %xmm5, %xmm5 ; x6 10 14 *xmm5
113 * mulps %xmm3, %xmm6 ; x7 11 15 *xmm6
114 * mulps %xmm7, %xmm7 ; x8 12 16 *xmm7
115 * movaps %xmm4, %xmm10 ; x5 13 14
116 * mulps %xmm4, %xmm8 ; x9 13 17 *xmm8
117 * mulps %xmm5, %xmm10 ; x11 14 18 *xmm10
118 * mulps %xmm5, %xmm9 ; x10 15 19 *xmm9
119 * mulps <P0>, %xmm10 ; y0 18 22
120 * mulps <P1>, %xmm9 ; y1 19 23
121 * mulps <P2>, %xmm8 ; y2 20 24
122 * mulps <P3>, %xmm7 ; y3 21 25
123 * addps %xmm10, %xmm9 ; y 23 26
124 * addps %xmm9, %xmm8 ; y 26 29
125 * addps %xmm8, %xmm7 ; y 29 32
126 */
127 const V x3 = x2 * x; // 4 → 8
128 const V x4 = x2 * x2; // 5 → 9
129 const V x5 = x2 * x3; // 8 → 12
130 const V x6 = x3 * x3; // 9 → 13
131 const V x7 = x4 * x3; //
132 const V x8 = x4 * x4;
133 const V x9 = x5 * x4;
134 const V x10 = x5 * x5;
135 const V x11 = x5 * x6; // 13 → 17
136 V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
137 + C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3;
138#else
139 V y = C::P(0);
140 Vc::Common::unrolled_loop<int, 1, 9>([&](int i) { y = y * x + C::P(i); });
141 y *= x * x2;
142#endif
143 switch (Base) {
144 case BaseE:
145 // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
146 y += exponent * C::ln2_small();
147 y -= x2 * C::_1_2(); // [0, 0.25[
148 x += y;
149 x += exponent * C::ln2_large();
150 break;
151 case Base10:
152 y += exponent * C::ln2_small();
153 y -= x2 * C::_1_2(); // [0, 0.25[
154 x += y;
155 x += exponent * C::ln2_large();
156 x *= C::log10_e();
157 break;
158 case Base2:
159 {
160 const V x_ = x;
161 x *= C::log2_e();
162 y *= C::log2_e();
163 y -= x_ * x * C::_1_2(); // [0, 0.25[
164 x += y;
165 x += exponent;
166 break;
167 }
168 }
169 }
170
171template <typename Abi>
172static Vc_ALWAYS_INLINE void log_series(Vector<double, Abi> &Vc_RESTRICT x,
173 typename Vector<double, Abi>::AsArg exponent)
174{
175 typedef Vector<double, Abi> V;
176 typedef Detail::Const<double, Abi> C;
177 const V x2 = x * x;
178 V y = C::P(0);
179 V y2 = C::Q(0) + x;
180 Vc::Common::unrolled_loop<int, 1, 5>([&](int i) {
181 y = y * x + C::P(i);
182 y2 = y2 * x + C::Q(i);
183 });
184 y2 = x / y2;
185 y = y * x + C::P(5);
186 y = x2 * y * y2;
187 // TODO: refactor the following with the float implementation:
188 switch (Base) {
189 case BaseE:
190 // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
191 y += exponent * C::ln2_small();
192 y -= x2 * C::_1_2(); // [0, 0.25[
193 x += y;
194 x += exponent * C::ln2_large();
195 break;
196 case Base10:
197 y += exponent * C::ln2_small();
198 y -= x2 * C::_1_2(); // [0, 0.25[
199 x += y;
200 x += exponent * C::ln2_large();
201 x *= C::log10_e();
202 break;
203 case Base2:
204 {
205 const V x_ = x;
206 x *= C::log2_e();
207 y *= C::log2_e();
208 y -= x_ * x * C::_1_2(); // [0, 0.25[
209 x += y;
210 x += exponent;
211 break;
212 }
213 }
214 }
215
216template <typename T, typename Abi, typename V = Vector<T, Abi>>
217static inline Vector<T, Abi> calc(V _x)
218{
219 typedef typename V::Mask M;
220 typedef Detail::Const<T, Abi> C;
221
222 V x(_x);
223
224 const M invalidMask = x < V::Zero();
225 const M infinityMask = x == V::Zero();
226 const M denormal = x <= C::min();
227
228 x(denormal) *= V(Vc::Detail::doubleConstant<1, 0, 54>()); // 2²⁵
229 V exponent = Detail::exponent(x.data()); // = ⎣log₂(x)⎦
230 exponent(denormal) -= 54;
231
232 x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
233 x = Detail::operator|(x,
234 C::_1_2()); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[
235
236 // split calculation in two cases:
237 // A: x ∈ [½, √½[
238 // B: x ∈ [√½, 1[
239 // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
240 // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
241
242 const M smallX = x < C::_1_sqrt2();
243 x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[
244 x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
245 exponent(!smallX) += V::One();
246
247 log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
248
249 x.setQnan(invalidMask); // x < 0 → NaN
250 x(infinityMask) = C::neginf(); // x = 0 → -∞
251
252 return x;
253 }
254};
255} // namespace Detail
256
257template <typename T, typename Abi>
258Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log(
259 const Vector<T, Abi> &x)
260{
261 return Detail::LogImpl<BaseE>::calc<T, Abi>(x);
262}
263template <typename T, typename Abi>
264Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log10(
265 const Vector<T, Abi> &x)
266{
267 return Detail::LogImpl<Base10>::calc<T, Abi>(x);
268}
269template <typename T, typename Abi>
270Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log2(
271 const Vector<T, Abi> &x)
272{
273 return Detail::LogImpl<Base2>::calc<T, Abi>(x);
274}
275
276#endif // Vc_COMMON_MATH_H_INTERNAL
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std::exponent function component-wise and concurrently.
Definition simdarray.h:1814