Vc  1.4.3
SIMD Vector Classes for C++
memory.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_MEMORY_H_
29 #define VC_COMMON_MEMORY_H_
30 
31 #include "memorybase.h"
32 #include <assert.h>
33 #include <algorithm>
34 #include <cstring>
35 #include <cstddef>
36 #include <initializer_list>
37 #include "memoryfwd.h"
38 #include "malloc.h"
39 #include "macros.h"
40 
41 namespace Vc_VERSIONED_NAMESPACE
42 {
43 namespace Common
44 {
45 template<typename V, size_t Size> struct _MemorySizeCalculation
46 {
47  enum AlignmentCalculations {
48  Alignment = V::Size,
49  AlignmentMask = Alignment - 1,
50  MaskedSize = Size & AlignmentMask,
51  Padding = Alignment - MaskedSize,
52  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
53  };
54 };
55 
66 template <typename V, size_t Size1, size_t Size2, bool InitPadding>
67 class Memory : public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
68  Memory<V, Size2, 0, InitPadding>>
69 {
70 public:
71  typedef typename V::EntryType EntryType;
72 
73 private:
76  friend class MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2, RowMemory>;
77  friend class MemoryDimensionBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
78  RowMemory>;
79  enum : size_t {
80  Alignment = V::MemoryAlignment,
81  PaddedSize2 = _MemorySizeCalculation<V, Size2>::PaddedSize
82  };
83  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
84  // integer constant' unless the
85  // static_cast is present
86  RowMemory m_mem[Size1];
87 
88  public:
89  using Base::vector;
90  enum Constants {
91  RowCount = Size1,
92  VectorsCount = PaddedSize2 / V::Size
93  };
94 
95  Memory() = default;
96 
102  static constexpr size_t rowsCount() { return RowCount; }
111  static constexpr size_t entriesCount() { return Size1 * Size2; }
117  static constexpr size_t vectorsCount() { return VectorsCount * Size1; }
118 
128  template<typename Parent, typename RM>
129  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) {
130  assert(vectorsCount() == rhs.vectorsCount());
131  Detail::copyVectors(*this, rhs);
132  return *this;
133  }
134 
135  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
136  Detail::copyVectors(*this, rhs);
137  return *this;
138  }
139 
147  inline Memory &operator=(const V &v) {
148  for (size_t i = 0; i < vectorsCount(); ++i) {
149  vector(i) = v;
150  }
151  return *this;
152  }
153 };
154 
198 template <typename V, size_t Size, bool InitPadding>
199 class Memory<V, Size, 0u, InitPadding> :
200  public MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>
201  {
202  public:
203  typedef typename V::EntryType EntryType;
204  private:
206  friend class MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
207  friend class MemoryDimensionBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
208  enum : size_t {
209  Alignment = V::MemoryAlignment, // in Bytes
210  MaskedSize = Size & (V::Size - 1), // the fraction of Size that exceeds
211  // an integral multiple of V::Size
212  Padding = V::Size - MaskedSize,
213  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
214  };
215  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
216  // integer constant' unless the
217  // static_cast is present
218  EntryType m_mem[PaddedSize];
219 
220  public:
221  using Base::vector;
222  enum Constants {
223  EntriesCount = Size,
224  VectorsCount = PaddedSize / V::Size
225  };
226 
227  Memory()
228  {
229  if (InitPadding) {
230  Base::lastVector() = V::Zero();
231  }
232  }
233 
234  Memory(std::initializer_list<EntryType> init)
235  {
236  Vc_ASSERT(init.size() <= Size);
237  Base::lastVector() = V::Zero();
238  std::copy(init.begin(), init.end(), &m_mem[0]);
239  }
240 
263  static Vc_ALWAYS_INLINE Vc_CONST Memory<V, Size, 0u, false> &fromRawData(EntryType *ptr)
264  {
265  // DANGER! This placement new has to use the right address. If the compiler decides
266  // RowMemory requires padding before the actual data then the address has to be adjusted
267  // accordingly
268  char *addr = reinterpret_cast<char *>(ptr);
269  typedef Memory<V, Size, 0u, false> MM;
270  addr -= offsetof(MM, m_mem);
271  return *new(addr) MM;
272  }
273 
279  static constexpr size_t entriesCount() { return EntriesCount; }
280 
286  static constexpr size_t vectorsCount() { return VectorsCount; }
287 
288  inline Memory(const Memory &rhs)
289  {
290  Detail::copyVectors(*this, rhs);
291  }
292 
293  template <size_t S> inline Memory(const Memory<V, S> &rhs)
294  {
295  assert(vectorsCount() == rhs.vectorsCount());
296  Detail::copyVectors(*this, rhs);
297  }
298 
299  inline Memory &operator=(const Memory &rhs)
300  {
301  Detail::copyVectors(*this, rhs);
302  return *this;
303  }
304 
305  template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs)
306  {
307  assert(vectorsCount() == rhs.vectorsCount());
308  Detail::copyVectors(*this, rhs);
309  return *this;
310  }
311 
312  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
313  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
314  return *this;
315  }
316  inline Memory &operator=(const V &v) {
317  for (size_t i = 0; i < vectorsCount(); ++i) {
318  vector(i) = v;
319  }
320  return *this;
321  }
322  };
323 
364  template<typename V> class Memory<V, 0u, 0u, true> : public MemoryBase<V, Memory<V, 0u, 0u, true>, 1, void>
365  {
366  public:
367  typedef typename V::EntryType EntryType;
368  private:
369  typedef MemoryBase<V, Memory<V>, 1, void> Base;
370  friend class MemoryBase<V, Memory<V>, 1, void>;
371  friend class MemoryDimensionBase<V, Memory<V>, 1, void>;
372  enum InternalConstants {
373  Alignment = V::Size,
374  AlignmentMask = Alignment - 1
375  };
376  size_t m_entriesCount;
377  size_t m_vectorsCount;
378  EntryType *m_mem;
379  size_t calcPaddedEntriesCount(size_t x)
380  {
381  size_t masked = x & AlignmentMask;
382  return (masked == 0 ? x : x + (Alignment - masked));
383  }
384  public:
385  using Base::vector;
386 
394  Vc_ALWAYS_INLINE Memory(size_t size)
395  : m_entriesCount(size),
396  m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)),
397  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount))
398  {
399  m_vectorsCount /= V::Size;
400  Base::lastVector() = V::Zero();
401  }
402 
410  template<typename Parent, typename RM>
411  Vc_ALWAYS_INLINE Memory(const MemoryBase<V, Parent, 1, RM> &rhs)
412  : m_entriesCount(rhs.entriesCount()),
413  m_vectorsCount(rhs.vectorsCount()),
414  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
415  {
416  Detail::copyVectors(*this, rhs);
417  }
418 
426  Vc_ALWAYS_INLINE Memory(const Memory &rhs)
427  : m_entriesCount(rhs.entriesCount()),
428  m_vectorsCount(rhs.vectorsCount()),
429  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
430  {
431  Detail::copyVectors(*this, rhs);
432  }
433 
437  Vc_ALWAYS_INLINE ~Memory()
438  {
439  Vc::free(m_mem);
440  }
441 
447  inline void swap(Memory &rhs) {
448  std::swap(m_mem, rhs.m_mem);
449  std::swap(m_entriesCount, rhs.m_entriesCount);
450  std::swap(m_vectorsCount, rhs.m_vectorsCount);
451  }
452 
456  Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; }
457 
461  Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; }
462 
472  template<typename Parent, typename RM>
473  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
474  assert(vectorsCount() == rhs.vectorsCount());
475  Detail::copyVectors(*this, rhs);
476  return *this;
477  }
478 
479  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
480  assert(vectorsCount() == rhs.vectorsCount());
481  Detail::copyVectors(*this, rhs);
482  return *this;
483  }
484 
494  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
495  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
496  return *this;
497  }
498 };
499 
510 Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr)
511 {
512  Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best<float>());
513 }
514 
527 Vc_ALWAYS_INLINE void prefetchForModify(const void *addr)
528 {
529  Vc::Detail::prefetchForModify(addr, VectorAbi::Best<float>());
530 }
531 
542 Vc_ALWAYS_INLINE void prefetchClose(const void *addr)
543 {
544  Vc::Detail::prefetchClose(addr, VectorAbi::Best<float>());
545 }
546 
557 Vc_ALWAYS_INLINE void prefetchMid(const void *addr)
558 {
559  Vc::Detail::prefetchMid(addr, VectorAbi::Best<float>());
560 }
561 
572 Vc_ALWAYS_INLINE void prefetchFar(const void *addr)
573 {
574  Vc::Detail::prefetchFar(addr, VectorAbi::Best<float>());
575 }
576 } // namespace Common
577 
578 using Common::Memory;
582 using Common::prefetchMid;
583 using Common::prefetchFar;
584 } // namespace Vc
585 
586 namespace std
587 {
588  template<typename V> Vc_ALWAYS_INLINE void swap(Vc::Memory<V> &a, Vc::Memory<V> &b) { a.swap(b); }
589 } // namespace std
590 
591 #endif // VC_COMMON_MEMORY_H_
Common interface to all Memory classes, independent of allocation on the stack or heap.
Definition: memorybase.h:360
size_t vectorsCount() const
Definition: memorybase.h:389
Memory(const MemoryBase< V, Parent, 1, RM > &rhs)
Copy the memory into a new memory area.
Definition: memory.h:411
Memory(const Memory &rhs)
Overload of the above function.
Definition: memory.h:426
Memory & operator=(const MemoryBase< V, Parent, 1, RM > &rhs)
Overwrite all entries with the values stored in rhs.
Definition: memory.h:473
~Memory()
Frees the memory which was allocated in the constructor.
Definition: memory.h:437
void swap(Memory &rhs)
Swap the contents and size information of two Memory objects.
Definition: memory.h:447
Memory & operator=(const EntryType *rhs)
Overwrite all entries with the values stored in the memory at rhs.
Definition: memory.h:494
Memory(size_t size)
Allocate enough memory to access size values of type V::EntryType.
Definition: memory.h:394
static constexpr size_t entriesCount()
Definition: memory.h:279
static Memory< V, Size, 0u, false > & fromRawData(EntryType *ptr)
Wrap existing data with the Memory convenience class.
Definition: memory.h:263
static constexpr size_t vectorsCount()
Definition: memory.h:286
A helper class for fixed-size two-dimensional arrays.
Definition: memory.h:69
static constexpr size_t entriesCount()
Definition: memory.h:111
static constexpr size_t rowsCount()
Definition: memory.h:102
static constexpr size_t vectorsCount()
Definition: memory.h:117
Memory & operator=(const MemoryBase< V, Parent, 2, RM > &rhs)
Copies the data from a different object.
Definition: memory.h:129
Memory & operator=(const V &v)
Initialize all data with the given vector.
Definition: memory.h:147
Common::AdaptSubscriptOperator< std::vector< T, Allocator > > vector
An adapted std::vector container with an additional subscript operator which implements gather and sc...
Definition: vector:55
void swap(Adapter< S, T, N > &a, std::size_t i, S &x)
Swaps one scalar object x with a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1108
void prefetchForModify(const void *addr)
Prefetch the cacheline containing addr for modification.
Definition: memory.h:527
T * malloc(size_t n)
Allocates memory on the Heap with alignment and padding suitable for vectorized access.
Definition: malloc.h:136
void prefetchClose(const void *addr)
Prefetch the cacheline containing addr to L1 cache.
Definition: memory.h:542
void prefetchFar(const void *addr)
Prefetch the cacheline containing addr to L3 cache.
Definition: memory.h:572
void prefetchMid(const void *addr)
Prefetch the cacheline containing addr to L2 cache.
Definition: memory.h:557
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:81
void prefetchForOneRead(const void *addr)
Prefetch the cacheline containing addr for a single read access.
Definition: memory.h:510
void free(T *p)
Frees memory that was allocated with Vc::malloc.
Definition: malloc.h:163
@ AlignOnVector
Align on boundary of vector sizes (e.g.
Definition: global.h:458
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:215
Vector Classes Namespace.
Definition: dox.h:585