Vc  1.4.2
SIMD Vector Classes for C++
memory.h
1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6  * Redistributions of source code must retain the above copyright
7  notice, this list of conditions and the following disclaimer.
8  * Redistributions in binary form must reproduce the above copyright
9  notice, this list of conditions and the following disclaimer in the
10  documentation and/or other materials provided with the distribution.
11  * Neither the names of contributing organizations nor the
12  names of its contributors may be used to endorse or promote products
13  derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_COMMON_MEMORY_H_
29 #define VC_COMMON_MEMORY_H_
30 
31 #include "memorybase.h"
32 #include <assert.h>
33 #include <algorithm>
34 #include <cstring>
35 #include <cstddef>
36 #include <initializer_list>
37 #include "memoryfwd.h"
38 #include "malloc.h"
39 #include "macros.h"
40 
41 namespace Vc_VERSIONED_NAMESPACE
42 {
43 namespace Common
44 {
45 template<typename V, size_t Size> struct _MemorySizeCalculation
46 {
47  enum AlignmentCalculations {
48  Alignment = V::Size,
49  AlignmentMask = Alignment - 1,
50  MaskedSize = Size & AlignmentMask,
51  Padding = Alignment - MaskedSize,
52  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
53  };
54 };
55 
66 template <typename V, size_t Size1, size_t Size2, bool InitPadding>
67 #ifdef Vc_RECURSIVE_MEMORY
68 class Memory : public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
69  Memory<V, Size2, 0, InitPadding>>
70 #else
71 class Memory : public AlignedBase<V::MemoryAlignment>,
72  public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
73  Memory<V, Size2, 0, false>>
74 #endif
75 {
76 public:
77  typedef typename V::EntryType EntryType;
78 
79 private:
80 #ifdef Vc_RECURSIVE_MEMORY
82 #else
84 #endif
86  friend class MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2, RowMemory>;
87  friend class MemoryDimensionBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
88  RowMemory>;
89  enum : size_t {
90  Alignment = V::MemoryAlignment,
91  PaddedSize2 = _MemorySizeCalculation<V, Size2>::PaddedSize
92  };
93  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
94  // integer constant' unless the
95  // static_cast is present
96 #ifdef Vc_RECURSIVE_MEMORY
97  RowMemory m_mem[Size1];
98 #else
99  EntryType m_mem[Size1][PaddedSize2];
100 #endif
101 
102  public:
103  using Base::vector;
104  enum Constants {
105  RowCount = Size1,
106  VectorsCount = PaddedSize2 / V::Size
107  };
108 
109 #ifdef Vc_RECURSIVE_MEMORY
110  Memory() = default;
111 #else
112  Memory()
113  {
114  if (InitPadding) {
115  if (Size1 > 32)
116  for (size_t i = 0; i < Size1; ++i) {
117  V::Zero().store(&m_mem[i][PaddedSize2 - V::Size], Vc::Streaming);
118  }
119  }
120  }
121 #endif
122 
128  static constexpr size_t rowsCount() { return RowCount; }
137  static constexpr size_t entriesCount() { return Size1 * Size2; }
143  static constexpr size_t vectorsCount() { return VectorsCount * Size1; }
144 
154  template<typename Parent, typename RM>
155  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) {
156  assert(vectorsCount() == rhs.vectorsCount());
157  Detail::copyVectors(*this, rhs);
158  return *this;
159  }
160 
161  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
162  Detail::copyVectors(*this, rhs);
163  return *this;
164  }
165 
173  inline Memory &operator=(const V &v) {
174  for (size_t i = 0; i < vectorsCount(); ++i) {
175  vector(i) = v;
176  }
177  return *this;
178  }
179 };
180 
224 template <typename V, size_t Size, bool InitPadding>
225 class Memory<V, Size, 0u, InitPadding> :
226 #ifndef Vc_RECURSIVE_MEMORY
227  public AlignedBase<V::MemoryAlignment>,
228 #endif
229  public MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>
230  {
231  public:
232  typedef typename V::EntryType EntryType;
233  private:
235  friend class MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
236  friend class MemoryDimensionBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
237  enum : size_t {
238  Alignment = V::MemoryAlignment, // in Bytes
239  MaskedSize = Size & (V::Size - 1), // the fraction of Size that exceeds
240  // an integral multiple of V::Size
241  Padding = V::Size - MaskedSize,
242  PaddedSize = MaskedSize == 0 ? Size : Size + Padding
243  };
244  alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
245  // integer constant' unless the
246  // static_cast is present
247  EntryType m_mem[PaddedSize];
248 
249  public:
250  using Base::vector;
251  enum Constants {
252  EntriesCount = Size,
253  VectorsCount = PaddedSize / V::Size
254  };
255 
256  Memory()
257  {
258  if (InitPadding) {
259  Base::lastVector() = V::Zero();
260  }
261  }
262 
263  Memory(std::initializer_list<EntryType> init)
264  {
265  Vc_ASSERT(init.size() <= Size);
266  Base::lastVector() = V::Zero();
267  std::copy(init.begin(), init.end(), &m_mem[0]);
268  }
269 
292  static Vc_ALWAYS_INLINE Vc_CONST Memory<V, Size, 0u, false> &fromRawData(EntryType *ptr)
293  {
294  // DANGER! This placement new has to use the right address. If the compiler decides
295  // RowMemory requires padding before the actual data then the address has to be adjusted
296  // accordingly
297  char *addr = reinterpret_cast<char *>(ptr);
298  typedef Memory<V, Size, 0u, false> MM;
299  addr -= Vc_OFFSETOF(MM, m_mem);
300  return *new(addr) MM;
301  }
302 
308  static constexpr size_t entriesCount() { return EntriesCount; }
309 
315  static constexpr size_t vectorsCount() { return VectorsCount; }
316 
317  inline Memory(const Memory &rhs)
318  {
319  Detail::copyVectors(*this, rhs);
320  }
321 
322  template <size_t S> inline Memory(const Memory<V, S> &rhs)
323  {
324  assert(vectorsCount() == rhs.vectorsCount());
325  Detail::copyVectors(*this, rhs);
326  }
327 
328  inline Memory &operator=(const Memory &rhs)
329  {
330  Detail::copyVectors(*this, rhs);
331  return *this;
332  }
333 
334  template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs)
335  {
336  assert(vectorsCount() == rhs.vectorsCount());
337  Detail::copyVectors(*this, rhs);
338  return *this;
339  }
340 
341  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
342  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
343  return *this;
344  }
345  inline Memory &operator=(const V &v) {
346  for (size_t i = 0; i < vectorsCount(); ++i) {
347  vector(i) = v;
348  }
349  return *this;
350  }
351  };
352 
393  template<typename V> class Memory<V, 0u, 0u, true> : public MemoryBase<V, Memory<V, 0u, 0u, true>, 1, void>
394  {
395  public:
396  typedef typename V::EntryType EntryType;
397  private:
398  typedef MemoryBase<V, Memory<V>, 1, void> Base;
399  friend class MemoryBase<V, Memory<V>, 1, void>;
400  friend class MemoryDimensionBase<V, Memory<V>, 1, void>;
401  enum InternalConstants {
402  Alignment = V::Size,
403  AlignmentMask = Alignment - 1
404  };
405  size_t m_entriesCount;
406  size_t m_vectorsCount;
407  EntryType *m_mem;
408  size_t calcPaddedEntriesCount(size_t x)
409  {
410  size_t masked = x & AlignmentMask;
411  return (masked == 0 ? x : x + (Alignment - masked));
412  }
413  public:
414  using Base::vector;
415 
423  Vc_ALWAYS_INLINE Memory(size_t size)
424  : m_entriesCount(size),
425  m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)),
426  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount))
427  {
428  m_vectorsCount /= V::Size;
429  Base::lastVector() = V::Zero();
430  }
431 
439  template<typename Parent, typename RM>
440  Vc_ALWAYS_INLINE Memory(const MemoryBase<V, Parent, 1, RM> &rhs)
441  : m_entriesCount(rhs.entriesCount()),
442  m_vectorsCount(rhs.vectorsCount()),
443  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
444  {
445  Detail::copyVectors(*this, rhs);
446  }
447 
455  Vc_ALWAYS_INLINE Memory(const Memory &rhs)
456  : m_entriesCount(rhs.entriesCount()),
457  m_vectorsCount(rhs.vectorsCount()),
458  m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
459  {
460  Detail::copyVectors(*this, rhs);
461  }
462 
466  Vc_ALWAYS_INLINE ~Memory()
467  {
468  Vc::free(m_mem);
469  }
470 
476  inline void swap(Memory &rhs) {
477  std::swap(m_mem, rhs.m_mem);
478  std::swap(m_entriesCount, rhs.m_entriesCount);
479  std::swap(m_vectorsCount, rhs.m_vectorsCount);
480  }
481 
485  Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; }
486 
490  Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; }
491 
501  template<typename Parent, typename RM>
502  Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
503  assert(vectorsCount() == rhs.vectorsCount());
504  Detail::copyVectors(*this, rhs);
505  return *this;
506  }
507 
508  Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
509  assert(vectorsCount() == rhs.vectorsCount());
510  Detail::copyVectors(*this, rhs);
511  return *this;
512  }
513 
523  Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
524  std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
525  return *this;
526  }
527 };
528 
539 Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr)
540 {
541  Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best<float>());
542 }
543 
556 Vc_ALWAYS_INLINE void prefetchForModify(const void *addr)
557 {
558  Vc::Detail::prefetchForModify(addr, VectorAbi::Best<float>());
559 }
560 
571 Vc_ALWAYS_INLINE void prefetchClose(const void *addr)
572 {
573  Vc::Detail::prefetchClose(addr, VectorAbi::Best<float>());
574 }
575 
586 Vc_ALWAYS_INLINE void prefetchMid(const void *addr)
587 {
588  Vc::Detail::prefetchMid(addr, VectorAbi::Best<float>());
589 }
590 
601 Vc_ALWAYS_INLINE void prefetchFar(const void *addr)
602 {
603  Vc::Detail::prefetchFar(addr, VectorAbi::Best<float>());
604 }
605 } // namespace Common
606 
607 using Common::Memory;
611 using Common::prefetchMid;
612 using Common::prefetchFar;
613 } // namespace Vc
614 
615 namespace std
616 {
617  template<typename V> Vc_ALWAYS_INLINE void swap(Vc::Memory<V> &a, Vc::Memory<V> &b) { a.swap(b); }
618 } // namespace std
619 
620 #endif // VC_COMMON_MEMORY_H_
Vc::Common::Memory< V, 0u, 0u, true >::vectorsCount
size_t vectorsCount() const
Definition: memory.h:490
Vc::Common::Memory< V, Size, 0u, InitPadding >::fromRawData
static Memory< V, Size, 0u, false > & fromRawData(EntryType *ptr)
Wrap existing data with the Memory convenience class.
Definition: memory.h:292
Vc::Common::Memory< V, 0u, 0u, true >::Memory
Memory(const Memory &rhs)
Overload of the above function.
Definition: memory.h:455
Vc::free
void free(T *p)
Frees memory that was allocated with Vc::malloc.
Definition: malloc.h:163
Vc::Common::Memory< V, 0u, 0u, true >::operator=
Memory & operator=(const EntryType *rhs)
Overwrite all entries with the values stored in the memory at rhs.
Definition: memory.h:523
Vc::Common::Memory
Definition: memory.h:71
Vc::Common::Memory::operator=
Memory & operator=(const MemoryBase< V, Parent, 2, RM > &rhs)
Copies the data from a different object.
Definition: memory.h:155
Vc::MemoryAlignment
constexpr std::size_t MemoryAlignment
Definition: vector.h:215
Vc::Zero
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:81
Vc::Common::Memory< V, 0u, 0u, true >::~Memory
~Memory()
Frees the memory which was allocated in the constructor.
Definition: memory.h:466
Vc::Common::Memory< V, 0u, 0u, true >::Memory
Memory(const MemoryBase< V, Parent, 1, RM > &rhs)
Copy the memory into a new memory area.
Definition: memory.h:440
Vc::Common::prefetchFar
void prefetchFar(const void *addr)
Prefetch the cacheline containing addr to L3 cache.
Definition: memory.h:601
Vc::Common::Memory::entriesCount
static constexpr size_t entriesCount()
Definition: memory.h:137
Vc::Common::Memory< V, 0u, 0u, true >::Memory
Memory(size_t size)
Allocate enough memory to access size values of type V::EntryType.
Definition: memory.h:423
Vc
Vector Classes Namespace.
Definition: dox.h:586
Vc::Common::Memory::operator=
Memory & operator=(const V &v)
Initialize all data with the given vector.
Definition: memory.h:173
Vc::Common::Memory::rowsCount
static constexpr size_t rowsCount()
Definition: memory.h:128
Vc::Common::Memory< V, Size, 0u, InitPadding >::vectorsCount
static constexpr size_t vectorsCount()
Definition: memory.h:315
Vc::Common::prefetchMid
void prefetchMid(const void *addr)
Prefetch the cacheline containing addr to L2 cache.
Definition: memory.h:586
Vc::Common::Memory< V, 0u, 0u, true >::swap
void swap(Memory &rhs)
Swap the contents and size information of two Memory objects.
Definition: memory.h:476
Vc::AlignOnVector
@ AlignOnVector
Align on boundary of vector sizes (e.g.
Definition: global.h:453
Vc::Streaming
constexpr StreamingTag Streaming
Use this object for a flags parameter to request streaming loads and stores.
Definition: loadstoreflags.h:206
Vc::Common::prefetchClose
void prefetchClose(const void *addr)
Prefetch the cacheline containing addr to L1 cache.
Definition: memory.h:571
Vc::Common::Memory< V, 0u, 0u, true >::operator=
Memory & operator=(const MemoryBase< V, Parent, 1, RM > &rhs)
Overwrite all entries with the values stored in rhs.
Definition: memory.h:502
Vc::Common::Memory< V, Size, 0u, InitPadding >::entriesCount
static constexpr size_t entriesCount()
Definition: memory.h:308
Vc::AlignedBase
Definition: alignedbase.h:67
Vc::vector
Common::AdaptSubscriptOperator< std::vector< T, Allocator > > vector
Definition: vector:55
Vc::Common::MemoryBase
Common interface to all Memory classes, independent of allocation on the stack or heap.
Definition: memorybase.h:367
Vc::Common::prefetchForOneRead
void prefetchForOneRead(const void *addr)
Prefetch the cacheline containing addr for a single read access.
Definition: memory.h:539
Vc::Common::Memory::vectorsCount
static constexpr size_t vectorsCount()
Definition: memory.h:143
Vc::Common::prefetchForModify
void prefetchForModify(const void *addr)
Prefetch the cacheline containing addr for modification.
Definition: memory.h:556
Vc::Common::MemoryBase::vectorsCount
size_t vectorsCount() const
Definition: memorybase.h:397
Vc::malloc
T * malloc(size_t n)
Allocates memory on the Heap with alignment and padding suitable for vectorized access.
Definition: malloc.h:136
Vc::SimdizeDetail::swap
void swap(Adapter< S, T, N > &a, std::size_t i, S &x)
Swaps one scalar object x with a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1108
Vc::Common::Memory< V, 0u, 0u, true >::entriesCount
size_t entriesCount() const
Definition: memory.h:485