YAKL
YAKL_intrinsics_merge.h
Go to the documentation of this file.
1 
2 #pragma once
3 // Included by YAKL_intrinsics.h
4 
6 namespace yakl {
7  namespace intrinsics {
8 
9  template <class T1, class T2,
10  typename std::enable_if<std::is_arithmetic<T1>::value && std::is_arithmetic<T2>::value,bool>::type=false>
11  YAKL_INLINE decltype(T1()+T2()) merge(T1 const t, T2 const f, bool cond) { return cond ? t : f; }
12 
13  template <class T1, class T2, int rank, int myStyle>
14  inline Array<decltype(T1()+T2()),rank,memHost,myStyle>
16  Array<T2 ,rank,memHost,myStyle> const & arr_false ,
17  Array<bool,rank,memHost,myStyle> const & mask ) {
18  #ifdef YAKL_DEBUG
19  using yakl::componentwise::operator==;
20  using yakl::componentwise::operator&&;
21  using yakl::componentwise::operator!;
22  if (!allocated(arr_true )) yakl_throw("ERROR: calling merge with arr_true unallocated.");
23  if (!allocated(arr_false)) yakl_throw("ERROR: calling merge with arr_false unallocated.");
24  if (!allocated(mask )) yakl_throw("ERROR: calling merge with mask unallocated.");
25  if (any( !( (shape(arr_true) == shape(arr_false)) && (shape(arr_false) == shape(mask)) ) ))
26  yakl_throw("ERROR: calling merge with array shapes that do not match");
27  #endif
28  Array<decltype(T1()+T2()),rank,memHost,myStyle> ret = arr_true.createHostObject();
29  for (unsigned i=0; i < arr_true.totElems(); i++) {
30  ret.data()[i] = mask.data()[i] ? arr_true.data()[i] : arr_false.data()[i];
31  }
32  return ret;
33  }
34 
35  template <class T1, class T2, int rank, int myStyle>
36  inline Array<decltype(T1()+T2()),rank,memDevice,myStyle>
38  Array<T2 ,rank,memDevice,myStyle> const & arr_false ,
39  Array<bool,rank,memDevice,myStyle> const & mask , Stream stream = Stream() ) {
40  #ifdef YAKL_DEBUG
41  using yakl::componentwise::operator==;
42  using yakl::componentwise::operator&&;
43  using yakl::componentwise::operator!;
44  if (!allocated(arr_true )) yakl_throw("ERROR: calling merge with arr_true unallocated.");
45  if (!allocated(arr_false)) yakl_throw("ERROR: calling merge with arr_false unallocated.");
46  if (!allocated(mask )) yakl_throw("ERROR: calling merge with mask unallocated.");
47  if (any( !( (shape(arr_true) == shape(arr_false)) && (shape(arr_false) == shape(mask)) ) ))
48  yakl_throw("ERROR: calling merge with array shapes that do not match");
49  #endif
50  Array<decltype(T1()+T2()),rank,memDevice,myStyle> ret = arr_true.createDeviceObject();
51  c::parallel_for( "YAKL_internal_merge" , arr_true.totElems() , YAKL_LAMBDA (int i) {
52  ret.data()[i] = mask.data()[i] ? arr_true.data()[i] : arr_false.data()[i];
53  }, DefaultLaunchConfig().set_stream(stream) );
54  ret.add_stream_dependency(stream);
55  return ret;
56  }
57 
58  template <class T1, class T2, int rank, unsigned D0, unsigned D1, unsigned D2, unsigned D3>
59  YAKL_INLINE SArray<decltype(T1()+T2()),rank,D0,D1,D2,D3>
60  merge( SArray<T1 ,rank,D0,D1,D2,D3> const & arr_true ,
61  SArray<T2 ,rank,D0,D1,D2,D3> const & arr_false ,
62  SArray<bool,rank,D0,D1,D2,D3> const & mask ) {
63  SArray<decltype(T1()+T2()),rank,D0,D1,D2,D3> ret;
64  for (unsigned i=0; i < arr_true.totElems(); i++) {
65  ret.data()[i] = mask.data()[i] ? arr_true.data()[i] : arr_false.data()[i];
66  }
67  return ret;
68  }
69 
70  template <class T1, class T2, int rank, class B0, class B1, class B2, class B3>
71  YAKL_INLINE FSArray<decltype(T1()+T2()),rank,B0,B1,B2,B3>
73  FSArray<T2 ,rank,B0,B1,B2,B3> const & arr_false ,
74  FSArray<bool,rank,B0,B1,B2,B3> const & mask ) {
75  FSArray<decltype(T1()+T2()),rank,B0,B1,B2,B3> ret;
76  for (unsigned i=0; i < arr_true.totElems(); i++) {
77  ret.data()[i] = mask.data()[i] ? arr_true.data()[i] : arr_false.data()[i];
78  }
79  return ret;
80  }
81 
82  }
83 }
85 
yakl::memDevice
constexpr int memDevice
Specifies a device memory address space for a yakl::Array object.
Definition: YAKL_memory_spaces.h:13
yakl::FSArray::totElems
static constexpr unsigned totElems()
Get the total number of array elements.
Definition: YAKL_FSArray.h:179
yakl::Stream
Implements the functionality of a stream for parallel kernel execution. If the Stream::create() metho...
Definition: YAKL_streams_events.h:394
yakl::intrinsics::merge
decltype(T1()+T2()) YAKL_INLINE merge(T1 const t, T2 const f, bool cond)
Definition: YAKL_intrinsics_merge.h:11
yakl::intrinsics::shape
YAKL_INLINE auto shape(T const &arr)
Definition: YAKL_intrinsics_shape.h:9
yakl::intrinsics::allocated
YAKL_INLINE bool allocated(T const &arr)
Definition: YAKL_intrinsics_allocated.h:9
yakl::c::parallel_for
void parallel_for(char const *str, Bounds< N, simple > const &bounds, F const &f, LaunchConfig< VecLen, B4B > config=LaunchConfig<>())
[ASYNCHRONOUS] Launch the passed functor in parallel.
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
yakl::DefaultLaunchConfig
LaunchConfig<> DefaultLaunchConfig
This launch configuration sets vector length to the device default and B4B to false.
Definition: YAKL_LaunchConfig.h:77
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::yakl_throw
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
yakl::Array
This declares the yakl::Array class. Please see the yakl::styleC and yakl::styleFortran template spec...
Definition: YAKL_Array.h:40
yakl::CSArray
C-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_CSArray.h:30
yakl
yakl::CSArray::totElems
static constexpr unsigned totElems()
Get the total number of array elements.
Definition: YAKL_CSArray.h:131
yakl::CSArray::data
YAKL_INLINE T * data() const
Get the underlying raw data pointer.
Definition: YAKL_CSArray.h:123
yakl::FSArray::data
YAKL_INLINE T * data() const
Get the underlying raw data pointer.
Definition: YAKL_FSArray.h:171
YAKL_LAMBDA
#define YAKL_LAMBDA
Used to create C++ lambda expressions passed to parallel_for and parallel_outer
Definition: YAKL_defines.h:128
yakl::FSArray
Fortran-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_FSArray.h:53
yakl::memHost
constexpr int memHost
Specifies a device memory address space for a yakl::Array object.
Definition: YAKL_memory_spaces.h:15
yakl::intrinsics::any
bool any(Array< T, rank, memHost, myStyle > arr)
Definition: YAKL_intrinsics_any.h:10