YAKL
YAKL_intrinsics_transpose.h
Go to the documentation of this file.
1 
2 #pragma once
3 // Included by YAKL_intrinsics.h
4 
6 namespace yakl {
7  namespace intrinsics {
8 
9  template <class T, int myMem, int myStyle>
11  #ifdef YAKL_DEBUG
12  if (!allocated(in)) yakl_throw("ERROR: Calling transpose on unallocated array");
13  #endif
14  if constexpr (myStyle == styleC) {
15  auto d0 = size(in,0);
16  auto d1 = size(in,1);
17  if constexpr (myMem == memHost) {
18  auto out = in.createHostCopy().template reshape<2>( { d1 , d0 } );
19  for (int i=0; i < d0; i++) {
20  for (int j=0; j < d1; j++) {
21  out(j,i) = in(i,j);
22  }
23  }
24  return out;
25  } else {
26  auto out = in.createDeviceCopy(stream).template reshape<2>( { d1 , d0 } );
27  c::parallel_for( "YAKL_internal_transpose" , c::Bounds<2>(d0,d1) , YAKL_LAMBDA (int i, int j) {
28  out(j,i) = in(i,j);
29  } , DefaultLaunchConfig().set_stream(stream) );
30  out.add_stream_dependency(stream);
31  return out;
32  }
33  } else {
34  auto l1 = lbound(in,1);
35  auto l2 = lbound(in,2);
36  auto u1 = ubound(in,1);
37  auto u2 = ubound(in,2);
38  if constexpr (myMem == memHost) {
39  auto out = in.createHostCopy().template reshape<2>( { {l2,u2} , {l1,u1} } );
40  for (int i=l1; i <= u1; i++) {
41  for (int j=l2; j <= u2; j++) {
42  out(j,i) = in(i,j);
43  }
44  }
45  return out;
46  } else {
47  auto out = in.createDeviceCopy(stream).template reshape<2>( { {l2,u2} , {l1,u1} } );
48  fortran::parallel_for( "YAKL_internal_transpose" , fortran::Bounds<2>({l1,u1},{l2,u2}) , YAKL_LAMBDA (int i, int j) {
49  out(j,i) = in(i,j);
50  } , DefaultLaunchConfig().set_stream(stream) );
51  out.add_stream_dependency(stream);
52  return out;
53  }
54  }
55  }
56 
57  template <unsigned int n1, unsigned int n2, class T>
60  for (int j=0; j < n1; j++) {
61  for (int i=0; i < n2; i++) {
62  ret(j,i) = a(i,j);
63  }
64  }
65  return ret;
66  }
67 
68  template <class B1, class B2, class T>
71  for (int j=B1::lower(); j <= B1::upper(); j++) {
72  for (int i=B2::lower(); i <= B2::upper(); i++) {
73  ret(j,i) = a(i,j);
74  }
75  }
76  return ret;
77  }
78 
79  }
80 }
82 
yakl::intrinsics::size
YAKL_INLINE int size(T const &arr, int dim)
Definition: YAKL_intrinsics_size.h:9
yakl::Stream
Implements the functionality of a stream for parallel kernel execution. If the Stream::create() metho...
Definition: YAKL_streams_events.h:394
yakl::intrinsics::transpose
Array< T, 2, myMem, myStyle > transpose(Array< T, 2, myMem, myStyle > const &in, Stream stream=Stream())
Definition: YAKL_intrinsics_transpose.h:10
yakl::c::Bounds
Describes a set of C-style tightly-nested loops.
Definition: YAKL_Bounds_c.h:84
yakl::intrinsics::allocated
YAKL_INLINE bool allocated(T const &arr)
Definition: YAKL_intrinsics_allocated.h:9
yakl::c::parallel_for
void parallel_for(char const *str, Bounds< N, simple > const &bounds, F const &f, LaunchConfig< VecLen, B4B > config=LaunchConfig<>())
[ASYNCHRONOUS] Launch the passed functor in parallel.
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
yakl::fortran::parallel_for
void parallel_for(char const *str, Bounds< N, simple > const &bounds, F const &f, LaunchConfig< VecLen, B4B > config=LaunchConfig<>())
[ASYNCHRONOUS] Launch the passed functor in parallel.
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
yakl::DefaultLaunchConfig
LaunchConfig<> DefaultLaunchConfig
This launch configuration sets vector length to the device default and B4B to false.
Definition: YAKL_LaunchConfig.h:77
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::intrinsics::ubound
YAKL_INLINE int ubound(T const &arr, int dim)
Definition: YAKL_intrinsics_ubound.h:9
yakl::intrinsics::lbound
YAKL_INLINE int lbound(T const &arr, int dim)
Definition: YAKL_intrinsics_lbound.h:9
yakl::yakl_throw
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
yakl::styleC
constexpr int styleC
Template parameter for yakl::Array that specifies it should follow C-style behavior.
Definition: YAKL_Array.h:20
yakl::Array
This declares the yakl::Array class. Please see the yakl::styleC and yakl::styleFortran template spec...
Definition: YAKL_Array.h:40
yakl::CSArray
C-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_CSArray.h:30
yakl::fortran::Bounds
Describes a set of Fortran-style tightly-nested loops.
Definition: YAKL_Bounds_fortran.h:79
yakl
YAKL_LAMBDA
#define YAKL_LAMBDA
Used to create C++ lambda expressions passed to parallel_for and parallel_outer
Definition: YAKL_defines.h:128
yakl::FSArray
Fortran-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_FSArray.h:53
yakl::memHost
constexpr int memHost
Specifies a device memory address space for a yakl::Array object.
Definition: YAKL_memory_spaces.h:15