Go to the documentation of this file.
9 template <
class T,
int myMem,
int myStyle>
14 if constexpr (myStyle ==
styleC) {
17 if constexpr (myMem ==
memHost) {
18 auto out = in.createHostCopy().template reshape<2>( { d1 , d0 } );
19 for (
int i=0; i < d0; i++) {
20 for (
int j=0; j < d1; j++) {
26 auto out = in.createDeviceCopy(stream).template reshape<2>( { d1 , d0 } );
30 out.add_stream_dependency(stream);
38 if constexpr (myMem ==
memHost) {
39 auto out = in.createHostCopy().template reshape<2>( { {l2,u2} , {l1,u1} } );
40 for (
int i=l1; i <= u1; i++) {
41 for (
int j=l2; j <= u2; j++) {
47 auto out = in.createDeviceCopy(stream).template reshape<2>( { {l2,u2} , {l1,u1} } );
51 out.add_stream_dependency(stream);
57 template <
unsigned int n1,
unsigned int n2,
class T>
60 for (
int j=0; j < n1; j++) {
61 for (
int i=0; i < n2; i++) {
68 template <
class B1,
class B2,
class T>
71 for (
int j=B1::lower(); j <= B1::upper(); j++) {
72 for (
int i=B2::lower(); i <= B2::upper(); i++) {
YAKL_INLINE int size(T const &arr, int dim)
Definition: YAKL_intrinsics_size.h:9
Implements the functionality of a stream for parallel kernel execution. If the Stream::create() metho...
Definition: YAKL_streams_events.h:394
Array< T, 2, myMem, myStyle > transpose(Array< T, 2, myMem, myStyle > const &in, Stream stream=Stream())
Definition: YAKL_intrinsics_transpose.h:10
Describes a set of C-style tightly-nested loops.
Definition: YAKL_Bounds_c.h:84
YAKL_INLINE bool allocated(T const &arr)
Definition: YAKL_intrinsics_allocated.h:9
void parallel_for(char const *str, Bounds< N, simple > const &bounds, F const &f, LaunchConfig< VecLen, B4B > config=LaunchConfig<>())
[ASYNCHRONOUS] Launch the passed functor in parallel.
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
void parallel_for(char const *str, Bounds< N, simple > const &bounds, F const &f, LaunchConfig< VecLen, B4B > config=LaunchConfig<>())
[ASYNCHRONOUS] Launch the passed functor in parallel.
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
LaunchConfig<> DefaultLaunchConfig
This launch configuration sets vector length to the device default and B4B to false.
Definition: YAKL_LaunchConfig.h:77
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
YAKL_INLINE int ubound(T const &arr, int dim)
Definition: YAKL_intrinsics_ubound.h:9
YAKL_INLINE int lbound(T const &arr, int dim)
Definition: YAKL_intrinsics_lbound.h:9
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
constexpr int styleC
Template parameter for yakl::Array that specifies it should follow C-style behavior.
Definition: YAKL_Array.h:20
This declares the yakl::Array class. Please see the yakl::styleC and yakl::styleFortran template spec...
Definition: YAKL_Array.h:40
C-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_CSArray.h:30
Describes a set of Fortran-style tightly-nested loops.
Definition: YAKL_Bounds_fortran.h:79
#define YAKL_LAMBDA
Used to create C++ lambda expressions passed to parallel_for and parallel_outer
Definition: YAKL_defines.h:128
Fortran-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_FSArray.h:53
constexpr int memHost
Specifies a device memory address space for a yakl::Array object.
Definition: YAKL_memory_spaces.h:15