YAKL
YAKL_simd.h
Go to the documentation of this file.
1 
6 #pragma once
7 // Included by YAKL.h
8 
9 #if defined(__GNUG__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__NVCOMPILER)
10 # define GET_SIMD_PRAGMA() _Pragma("GCC ivdep")
11 #elif defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__NVCOMPILER)
12 # define GET_SIMD_PRAGMA() _Pragma("clang loop vectorize(enable)")
13 #elif defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
14 # define GET_SIMD_PRAGMA() _Pragma("ivdep")
15 #else
16 # define GET_SIMD_PRAGMA()
17 #endif
18 
20 namespace yakl {
21 
23 namespace simd {
24 
25 
39  template <class T, unsigned int N>
40  class Pack {
41  public:
43  T myData[N];
44 
47  #ifdef YAKL_DEBUG
48  if (i >= N) { yakl_throw("Pack index out of bounds"); }
49  #endif
50  return myData[i];
51  }
52 
55  #ifdef YAKL_DEBUG
56  if (i >= N) { yakl_throw("Pack index out of bounds"); }
57  #endif
58  return myData[i];
59  }
60 
62  YAKL_INLINE static int constexpr get_pack_size() { return N; }
63 
65  // SELF OPERATORS WITH SCALAR VALUES
67  template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
70  for (int i=0 ; i < N ; i++) { (*this)(i) = rhs; }
71  return *this;
72  }
73 
74  template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
77  for (uint i=0; i < N; i++) { (*this)(i) += rhs; }
78  return *this;
79  }
80 
81  template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
84  for (uint i=0; i < N; i++) { (*this)(i) -= rhs; }
85  return *this;
86  }
87 
88  template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
91  for (uint i=0; i < N; i++) { (*this)(i) *= rhs; }
92  return *this;
93  }
94 
95  template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
98  for (uint i=0; i < N; i++) { (*this)(i) /= rhs; }
99  return *this;
100  }
101 
103  // SELF OPERATORS WITH PACKS
105  template <class TLOC>
108  for (uint i=0; i < N; i++) { (*this)(i) += rhs(i); }
109  return *this;
110  }
111 
112  template <class TLOC>
115  for (uint i=0; i < N; i++) { (*this)(i) -= rhs(i); }
116  return *this;
117  }
118 
119  template <class TLOC>
122  for (uint i=0; i < N; i++) { (*this)(i) *= rhs(i); }
123  return *this;
124  }
125 
126  template <class TLOC>
129  for (uint i=0; i < N; i++) { (*this)(i) /= rhs(i); }
130  return *this;
131  }
132 
133 
135  inline friend std::ostream &operator<<(std::ostream& os, Pack<T,N> const &v) {
136  for (uint i=0; i<N; i++) { os << std::setw(12) << v(i) << " "; }
137  os << "\n";
138  return os;
139  }
140  };
141 
142 
149  template <unsigned int N, bool SIMD=false> struct PackIterConfig {};
150 
151 
152 
163  template <class F, unsigned int N, bool SIMD=false>
165  if constexpr (SIMD) {
167  for (int i=0 ; i < N ; i++) { f(i); }
168  } else {
169  for (int i=0 ; i < N ; i++) { f(i); }
170  }
171  }
172 
173 
175  // OPERATIONS WITH SCALARS
177  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
179  Pack<T,N> ret;
181  for (uint i=0; i < N; i++) { ret(i) = lhs(i) + val; }
182  return ret;
183  }
184  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
186  Pack<T,N> ret;
188  for (uint i=0; i < N; i++) { ret(i) = val + rhs(i); }
189  return ret;
190  }
191 
192  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
194  Pack<T,N> ret;
196  for (uint i=0; i < N; i++) { ret(i) = lhs(i) - val; }
197  return ret;
198  }
199  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
201  Pack<T,N> ret;
203  for (uint i=0; i < N; i++) { ret(i) = val - rhs(i); }
204  return ret;
205  }
206 
207  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
209  Pack<T,N> ret;
211  for (uint i=0; i < N; i++) { ret(i) = lhs(i) * val; }
212  return ret;
213  }
214  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
216  Pack<T,N> ret;
218  for (uint i=0; i < N; i++) { ret(i) = val * rhs(i); }
219  return ret;
220  }
221 
222  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
224  Pack<T,N> ret;
226  for (uint i=0; i < N; i++) { ret(i) = lhs(i) / val; }
227  return ret;
228  }
229  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
231  Pack<T,N> ret;
233  for (uint i=0; i < N; i++) { ret(i) = val / rhs(i); }
234  return ret;
235  }
236 
237  template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,bool>::type = false >
238  YAKL_INLINE Pack<T,N> pow(Pack<T,N> lhs , TLOC val) {
239  Pack<T,N> ret;
241  for (uint i=0; i < N; i++) { ret(i) = std::pow( lhs(i) , val ); }
242  return ret;
243  }
244 
245 
247  // UNARY OPERATORS
249  template <class T, unsigned int N>
251  Pack<T,N> ret;
253  for (int i=0; i < N; i++) { ret(i) = -( a(i) ); }
254  return ret;
255  }
256 
257  template <class T, unsigned int N>
259  Pack<T,N> ret;
261  for (int i=0; i < N; i++) { ret(i) = std::sqrt( a(i) ); }
262  return ret;
263  }
264 
265  template <class T, unsigned int N>
267  Pack<T,N> ret;
269  for (int i=0; i < N; i++) { ret(i) = std::abs( a(i) ); }
270  return ret;
271  }
272 
273  template <class T, unsigned int N>
275  Pack<T,N> ret;
277  for (int i=0; i < N; i++) { ret(i) = std::exp( a(i) ); }
278  return ret;
279  }
280 
281  template <class T, unsigned int N>
283  Pack<T,N> ret;
285  for (int i=0; i < N; i++) { ret(i) = std::log( a(i) ); }
286  return ret;
287  }
288 
289  template <class T, unsigned int N>
291  Pack<T,N> ret;
293  for (int i=0; i < N; i++) { ret(i) = std::log10( a(i) ); }
294  return ret;
295  }
296 
297  template <class T, unsigned int N>
299  Pack<T,N> ret;
301  for (int i=0; i < N; i++) { ret(i) = std::cos( a(i) ); }
302  return ret;
303  }
304 
305  template <class T, unsigned int N>
307  Pack<T,N> ret;
309  for (int i=0; i < N; i++) { ret(i) = std::sin( a(i) ); }
310  return ret;
311  }
312 
313  template <class T, unsigned int N>
315  Pack<T,N> ret;
317  for (int i=0; i < N; i++) { ret(i) = std::tan( a(i) ); }
318  return ret;
319  }
320 
321  template <class T, unsigned int N>
323  Pack<T,N> ret;
325  for (int i=0; i < N; i++) { ret(i) = std::acos( a(i) ); }
326  return ret;
327  }
328 
329  template <class T, unsigned int N>
331  Pack<T,N> ret;
333  for (int i=0; i < N; i++) { ret(i) = std::asin( a(i) ); }
334  return ret;
335  }
336 
337  template <class T, unsigned int N>
339  Pack<T,N> ret;
341  for (int i=0; i < N; i++) { ret(i) = std::atan( a(i) ); }
342  return ret;
343  }
344 
345  template <class T, unsigned int N>
347  Pack<T,N> ret;
349  for (int i=0; i < N; i++) { ret(i) = std::ceil( a(i) ); }
350  return ret;
351  }
352 
353  template <class T, unsigned int N>
355  Pack<T,N> ret;
357  for (int i=0; i < N; i++) { ret(i) = std::floor( a(i) ); }
358  return ret;
359  }
360 
361  template <class T, unsigned int N>
363  Pack<T,N> ret;
365  for (int i=0; i < N; i++) { ret(i) = std::round( a(i) ); }
366  return ret;
367  }
368 
369 
370 
372  // BINARY OPERATORS
374  template <class T, unsigned int N>
376  Pack<T,N> ret;
378  for (int i=0; i < N; i++) {
379  ret(i) = a(i) + b(i);
380  }
381  return ret;
382  }
383 
384 
385  template <class T, unsigned int N>
387  Pack<T,N> ret;
389  for (int i=0; i < N; i++) {
390  ret(i) = a(i) - b(i);
391  }
392  return ret;
393  }
394 
395 
396  template <class T, unsigned int N>
398  Pack<T,N> ret;
400  for (int i=0; i < N; i++) {
401  ret(i) = a(i) * b(i);
402  }
403  return ret;
404  }
405 
406 
407  template <class T, unsigned int N>
409  Pack<T,N> ret;
411  for (int i=0; i < N; i++) {
412  ret(i) = a(i) / b(i);
413  }
414  return ret;
415  }
416 
417 
418  template <class T, unsigned int N>
420  Pack<T,N> ret;
422  for (int i=0; i < N; i++) {
423  ret(i) = std::pow( a(i) , b(i) );
424  }
425  return ret;
426  }
427 
428 }
429 
430 }
432 
433 
yakl::simd::Pack::operator<<
friend std::ostream & operator<<(std::ostream &os, Pack< T, N > const &v)
Print out the Pack object values to stdout.
Definition: YAKL_simd.h:135
yakl::simd::PackIterConfig
Informs iterate_over_pack of the Pack size and whether to apply a SIMD pragma.
Definition: YAKL_simd.h:149
yakl::simd::operator-
YAKL_INLINE Pack< T, N > operator-(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:193
yakl::uint
unsigned int uint
Definition: YAKL.h:32
yakl::simd::Pack::operator/=
YAKL_INLINE Pack< T, N > & operator/=(TLOC rhs)
Definition: YAKL_simd.h:96
yakl::simd::log
YAKL_INLINE Pack< T, N > log(Pack< T, N > a)
Definition: YAKL_simd.h:282
yakl::simd::sin
YAKL_INLINE Pack< T, N > sin(Pack< T, N > a)
Definition: YAKL_simd.h:306
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
yakl::simd::acos
YAKL_INLINE Pack< T, N > acos(Pack< T, N > a)
Definition: YAKL_simd.h:322
yakl::simd::exp
YAKL_INLINE Pack< T, N > exp(Pack< T, N > a)
Definition: YAKL_simd.h:274
yakl::simd::cos
YAKL_INLINE Pack< T, N > cos(Pack< T, N > a)
Definition: YAKL_simd.h:298
yakl::simd::ceil
YAKL_INLINE Pack< T, N > ceil(Pack< T, N > a)
Definition: YAKL_simd.h:346
yakl::simd::atan
YAKL_INLINE Pack< T, N > atan(Pack< T, N > a)
Definition: YAKL_simd.h:338
yakl::simd::pow
YAKL_INLINE Pack< T, N > pow(Pack< T, N > a, Pack< T, N > b)
Definition: YAKL_simd.h:419
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
yakl::simd::sqrt
YAKL_INLINE Pack< T, N > sqrt(Pack< T, N > a)
Definition: YAKL_simd.h:258
yakl::simd::asin
YAKL_INLINE Pack< T, N > asin(Pack< T, N > a)
Definition: YAKL_simd.h:330
yakl::simd::operator/
YAKL_INLINE Pack< T, N > operator/(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:223
yakl::simd::Pack::get_pack_size
static YAKL_INLINE constexpr int get_pack_size()
Returns the number of elements in the Pack object.
Definition: YAKL_simd.h:62
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::simd::Pack::operator-=
YAKL_INLINE Pack< T, N > & operator-=(TLOC rhs)
Definition: YAKL_simd.h:82
yakl::simd::Pack
The Pack class performs encourages vectorization by performing operations Packs of known size.
Definition: YAKL_simd.h:40
yakl::simd::tan
YAKL_INLINE Pack< T, N > tan(Pack< T, N > a)
Definition: YAKL_simd.h:314
yakl::simd::operator*
YAKL_INLINE Pack< T, N > operator*(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:208
yakl::yakl_throw
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
yakl::simd::operator+
YAKL_INLINE Pack< T, N > operator+(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:178
yakl::simd::round
YAKL_INLINE Pack< T, N > round(Pack< T, N > a)
Definition: YAKL_simd.h:362
yakl
yakl::simd::Pack::operator()
YAKL_INLINE T & operator()(uint i)
Returns a modifiable reference to the data at the requested index.
Definition: YAKL_simd.h:46
yakl::simd::abs
YAKL_INLINE Pack< T, N > abs(Pack< T, N > a)
Definition: YAKL_simd.h:266
yakl::simd::iterate_over_pack
YAKL_INLINE void iterate_over_pack(F const &f, PackIterConfig< N, SIMD > config)
Perform a loop over the number of elements specified by the PackIterConfig object.
Definition: YAKL_simd.h:164
GET_SIMD_PRAGMA
#define GET_SIMD_PRAGMA()
Definition: YAKL_simd.h:16
yakl::simd::Pack::operator=
YAKL_INLINE Pack< T, N > & operator=(TLOC rhs)
Definition: YAKL_simd.h:68
yakl::simd::log10
YAKL_INLINE Pack< T, N > log10(Pack< T, N > a)
Definition: YAKL_simd.h:290
yakl::simd::pow
YAKL_INLINE Pack< T, N > pow(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:238
yakl::simd::Pack::operator+=
YAKL_INLINE Pack< T, N > & operator+=(TLOC rhs)
Definition: YAKL_simd.h:75
yakl::simd::Pack::operator*=
YAKL_INLINE Pack< T, N > & operator*=(TLOC rhs)
Definition: YAKL_simd.h:89
yakl::simd::floor
YAKL_INLINE Pack< T, N > floor(Pack< T, N > a)
Definition: YAKL_simd.h:354