Go to the documentation of this file.
9 #if defined(__GNUG__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__NVCOMPILER)
10 # define GET_SIMD_PRAGMA() _Pragma("GCC ivdep")
11 #elif defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__NVCOMPILER)
12 # define GET_SIMD_PRAGMA() _Pragma("clang loop vectorize(enable)")
13 #elif defined(__INTEL_COMPILER) || defined(__NVCOMPILER)
14 # define GET_SIMD_PRAGMA() _Pragma("ivdep")
16 # define GET_SIMD_PRAGMA()
39 template <
class T,
unsigned int N>
48 if (i >= N) {
yakl_throw(
"Pack index out of bounds"); }
56 if (i >= N) {
yakl_throw(
"Pack index out of bounds"); }
67 template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
70 for (
int i=0 ; i < N ; i++) { (*this)(i) = rhs; }
74 template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
77 for (
uint i=0; i < N; i++) { (*this)(i) += rhs; }
81 template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
84 for (
uint i=0; i < N; i++) { (*this)(i) -= rhs; }
88 template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
91 for (
uint i=0; i < N; i++) { (*this)(i) *= rhs; }
95 template <class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
98 for (
uint i=0; i < N; i++) { (*this)(i) /= rhs; }
105 template <
class TLOC>
108 for (
uint i=0; i < N; i++) { (*this)(i) += rhs(i); }
112 template <
class TLOC>
115 for (
uint i=0; i < N; i++) { (*this)(i) -= rhs(i); }
119 template <
class TLOC>
122 for (
uint i=0; i < N; i++) { (*this)(i) *= rhs(i); }
126 template <
class TLOC>
129 for (
uint i=0; i < N; i++) { (*this)(i) /= rhs(i); }
136 for (
uint i=0; i<N; i++) { os << std::setw(12) << v(i) <<
" "; }
163 template <
class F,
unsigned int N,
bool SIMD=false>
165 if constexpr (SIMD) {
167 for (
int i=0 ; i < N ; i++) { f(i); }
169 for (
int i=0 ; i < N ; i++) { f(i); }
177 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
181 for (
uint i=0; i < N; i++) { ret(i) = lhs(i) + val; }
184 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
188 for (
uint i=0; i < N; i++) { ret(i) = val + rhs(i); }
192 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
196 for (
uint i=0; i < N; i++) { ret(i) = lhs(i) - val; }
199 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
203 for (
uint i=0; i < N; i++) { ret(i) = val - rhs(i); }
207 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
211 for (
uint i=0; i < N; i++) { ret(i) = lhs(i) * val; }
214 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
218 for (
uint i=0; i < N; i++) { ret(i) = val * rhs(i); }
222 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
226 for (
uint i=0; i < N; i++) { ret(i) = lhs(i) / val; }
229 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
233 for (
uint i=0; i < N; i++) { ret(i) = val / rhs(i); }
237 template <class T, unsigned int N, class TLOC , typename std::enable_if<std::is_arithmetic<TLOC>::value,
bool>::type =
false >
241 for (
uint i=0; i < N; i++) { ret(i) =
std::pow( lhs(i) , val ); }
249 template <
class T,
unsigned int N>
253 for (
int i=0; i < N; i++) { ret(i) = -( a(i) ); }
257 template <
class T,
unsigned int N>
261 for (
int i=0; i < N; i++) { ret(i) =
std::sqrt( a(i) ); }
265 template <
class T,
unsigned int N>
269 for (
int i=0; i < N; i++) { ret(i) =
std::abs( a(i) ); }
273 template <
class T,
unsigned int N>
277 for (
int i=0; i < N; i++) { ret(i) =
std::exp( a(i) ); }
281 template <
class T,
unsigned int N>
285 for (
int i=0; i < N; i++) { ret(i) =
std::log( a(i) ); }
289 template <
class T,
unsigned int N>
293 for (
int i=0; i < N; i++) { ret(i) =
std::log10( a(i) ); }
297 template <
class T,
unsigned int N>
301 for (
int i=0; i < N; i++) { ret(i) =
std::cos( a(i) ); }
305 template <
class T,
unsigned int N>
309 for (
int i=0; i < N; i++) { ret(i) =
std::sin( a(i) ); }
313 template <
class T,
unsigned int N>
317 for (
int i=0; i < N; i++) { ret(i) =
std::tan( a(i) ); }
321 template <
class T,
unsigned int N>
325 for (
int i=0; i < N; i++) { ret(i) =
std::acos( a(i) ); }
329 template <
class T,
unsigned int N>
333 for (
int i=0; i < N; i++) { ret(i) =
std::asin( a(i) ); }
337 template <
class T,
unsigned int N>
341 for (
int i=0; i < N; i++) { ret(i) =
std::atan( a(i) ); }
345 template <
class T,
unsigned int N>
349 for (
int i=0; i < N; i++) { ret(i) =
std::ceil( a(i) ); }
353 template <
class T,
unsigned int N>
357 for (
int i=0; i < N; i++) { ret(i) =
std::floor( a(i) ); }
361 template <
class T,
unsigned int N>
365 for (
int i=0; i < N; i++) { ret(i) =
std::round( a(i) ); }
374 template <
class T,
unsigned int N>
378 for (
int i=0; i < N; i++) {
379 ret(i) = a(i) + b(i);
385 template <
class T,
unsigned int N>
389 for (
int i=0; i < N; i++) {
390 ret(i) = a(i) - b(i);
396 template <
class T,
unsigned int N>
400 for (
int i=0; i < N; i++) {
401 ret(i) = a(i) * b(i);
407 template <
class T,
unsigned int N>
411 for (
int i=0; i < N; i++) {
412 ret(i) = a(i) / b(i);
418 template <
class T,
unsigned int N>
422 for (
int i=0; i < N; i++) {
friend std::ostream & operator<<(std::ostream &os, Pack< T, N > const &v)
Print out the Pack object values to stdout.
Definition: YAKL_simd.h:135
Informs iterate_over_pack of the Pack size and whether to apply a SIMD pragma.
Definition: YAKL_simd.h:149
YAKL_INLINE Pack< T, N > operator-(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:193
unsigned int uint
Definition: YAKL.h:32
YAKL_INLINE Pack< T, N > & operator/=(TLOC rhs)
Definition: YAKL_simd.h:96
YAKL_INLINE Pack< T, N > log(Pack< T, N > a)
Definition: YAKL_simd.h:282
YAKL_INLINE Pack< T, N > sin(Pack< T, N > a)
Definition: YAKL_simd.h:306
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
YAKL_INLINE Pack< T, N > acos(Pack< T, N > a)
Definition: YAKL_simd.h:322
YAKL_INLINE Pack< T, N > exp(Pack< T, N > a)
Definition: YAKL_simd.h:274
YAKL_INLINE Pack< T, N > cos(Pack< T, N > a)
Definition: YAKL_simd.h:298
YAKL_INLINE Pack< T, N > ceil(Pack< T, N > a)
Definition: YAKL_simd.h:346
YAKL_INLINE Pack< T, N > atan(Pack< T, N > a)
Definition: YAKL_simd.h:338
YAKL_INLINE Pack< T, N > pow(Pack< T, N > a, Pack< T, N > b)
Definition: YAKL_simd.h:419
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
YAKL_INLINE Pack< T, N > sqrt(Pack< T, N > a)
Definition: YAKL_simd.h:258
YAKL_INLINE Pack< T, N > asin(Pack< T, N > a)
Definition: YAKL_simd.h:330
YAKL_INLINE Pack< T, N > operator/(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:223
static YAKL_INLINE constexpr int get_pack_size()
Returns the number of elements in the Pack object.
Definition: YAKL_simd.h:62
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
YAKL_INLINE Pack< T, N > & operator-=(TLOC rhs)
Definition: YAKL_simd.h:82
The Pack class performs encourages vectorization by performing operations Packs of known size.
Definition: YAKL_simd.h:40
YAKL_INLINE Pack< T, N > tan(Pack< T, N > a)
Definition: YAKL_simd.h:314
YAKL_INLINE Pack< T, N > operator*(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:208
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
YAKL_INLINE Pack< T, N > operator+(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:178
YAKL_INLINE Pack< T, N > round(Pack< T, N > a)
Definition: YAKL_simd.h:362
YAKL_INLINE T & operator()(uint i)
Returns a modifiable reference to the data at the requested index.
Definition: YAKL_simd.h:46
YAKL_INLINE Pack< T, N > abs(Pack< T, N > a)
Definition: YAKL_simd.h:266
YAKL_INLINE void iterate_over_pack(F const &f, PackIterConfig< N, SIMD > config)
Perform a loop over the number of elements specified by the PackIterConfig object.
Definition: YAKL_simd.h:164
#define GET_SIMD_PRAGMA()
Definition: YAKL_simd.h:16
YAKL_INLINE Pack< T, N > & operator=(TLOC rhs)
Definition: YAKL_simd.h:68
YAKL_INLINE Pack< T, N > log10(Pack< T, N > a)
Definition: YAKL_simd.h:290
YAKL_INLINE Pack< T, N > pow(Pack< T, N > lhs, TLOC val)
Definition: YAKL_simd.h:238
YAKL_INLINE Pack< T, N > & operator+=(TLOC rhs)
Definition: YAKL_simd.h:75
YAKL_INLINE Pack< T, N > & operator*=(TLOC rhs)
Definition: YAKL_simd.h:89
YAKL_INLINE Pack< T, N > floor(Pack< T, N > a)
Definition: YAKL_simd.h:354