YAKL
YAKL_intrinsics_count.h
Go to the documentation of this file.
1 
2 #pragma once
3 // Included by YAKL_intrinsics.h
4 
6 namespace yakl {
7  namespace intrinsics {
8 
9  template <int rank, int myStyle>
10  inline int count( Array<bool,rank,memHost,myStyle> const &mask ) {
11  #ifdef YAKL_DEBUG
12  if (!mask.initialized()) { yakl_throw("ERROR: calling count on an array that has not been initialized"); }
13  #endif
14  int numTrue = 0;
15  for (int i=0; i < mask.totElems(); i++) {
16  if (mask.data()[i]) { numTrue++; }
17  }
18  return numTrue;
19  }
20 
21  template <int rank, int myStyle>
22  inline int count( Array<bool,rank,memDevice,myStyle> const &mask , Stream stream = Stream() ) {
23  #ifdef YAKL_DEBUG
24  if (!mask.initialized()) { yakl_throw("ERROR: calling count on an array that has not been initialized"); }
25  #endif
26  auto intarr = mask.template createDeviceObject<int>();
27  c::parallel_for( "YAKL_internal_count" , mask.totElems() , YAKL_LAMBDA (int i) { intarr.data()[i] = mask.data()[i] ? 1 : 0; },
28  DefaultLaunchConfig().set_stream(stream) );
29  return yakl::intrinsics::sum(intarr,stream);
30  }
31 
32  template <int rank, class D0, class D1, class D2, class D3>
34  int numTrue = 0;
35  for (int i=0; i < mask.totElems(); i++) {
36  if (mask.data()[i]) { numTrue++; }
37  }
38  return numTrue;
39  }
40 
41  template <int rank, unsigned D0, unsigned D1, unsigned D2, unsigned D3>
43  int numTrue = 0;
44  for (int i=0; i < mask.totElems(); i++) {
45  if (mask.data()[i]) { numTrue++; }
46  }
47  return numTrue;
48  }
49 
50  }
51 }
53 
yakl::FSArray::totElems
static constexpr unsigned totElems()
Get the total number of array elements.
Definition: YAKL_FSArray.h:179
yakl::Stream
Implements the functionality of a stream for parallel kernel execution. If the Stream::create() metho...
Definition: YAKL_streams_events.h:394
yakl::intrinsics::count
int count(Array< bool, rank, memHost, myStyle > const &mask)
Definition: YAKL_intrinsics_count.h:10
yakl::c::parallel_for
void parallel_for(char const *str, Bounds< N, simple > const &bounds, F const &f, LaunchConfig< VecLen, B4B > config=LaunchConfig<>())
[ASYNCHRONOUS] Launch the passed functor in parallel.
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
yakl::DefaultLaunchConfig
LaunchConfig<> DefaultLaunchConfig
This launch configuration sets vector length to the device default and B4B to false.
Definition: YAKL_LaunchConfig.h:77
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::intrinsics::sum
T sum(Array< T, rank, memHost, myStyle > const &arr)
Definition: YAKL_intrinsics_sum.h:10
yakl::yakl_throw
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
yakl::Array
This declares the yakl::Array class. Please see the yakl::styleC and yakl::styleFortran template spec...
Definition: YAKL_Array.h:40
yakl::CSArray
C-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_CSArray.h:30
yakl
yakl::CSArray::totElems
static constexpr unsigned totElems()
Get the total number of array elements.
Definition: YAKL_CSArray.h:131
yakl::CSArray::data
YAKL_INLINE T * data() const
Get the underlying raw data pointer.
Definition: YAKL_CSArray.h:123
yakl::FSArray::data
YAKL_INLINE T * data() const
Get the underlying raw data pointer.
Definition: YAKL_FSArray.h:171
YAKL_LAMBDA
#define YAKL_LAMBDA
Used to create C++ lambda expressions passed to parallel_for and parallel_outer
Definition: YAKL_defines.h:128
yakl::FSArray
Fortran-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_FSArray.h:53