YAKL
YAKL_Bounds_c.h
Go to the documentation of this file.
1 
7 #pragma once
8 // Included by YAKL_parallel_for_c.h
9 
11 namespace yakl {
12 namespace c {
13 
15  class LBnd {
16  public:
17  int static constexpr default_lbound = 0;
19  int l;
21  int u;
23  int s;
26  this->l = -1;
27  this->u = -1;
28  this->s = -1;
29  }
32  this->l = 0;
33  this->u = u-1;
34  this->s = 1;
35  }
38  this->l = 0;
39  this->u = u-1;
40  this->s = 1;
41  }
43  YAKL_INLINE LBnd(int l, int u) {
44  this->l = l;
45  this->u = u;
46  this->s = 1;
47  #ifdef YAKL_DEBUG
48  if (u < l) yakl_throw("ERROR: cannot specify an upper bound < lower bound");
49  #endif
50  }
52  YAKL_INLINE LBnd(int l, int u, int s) {
53  this->l = l;
54  this->u = u;
55  this->s = s;
56  #ifdef YAKL_DEBUG
57  if (u < l) yakl_throw("ERROR: cannot specify an upper bound < lower bound");
58  if (s < 1) yakl_throw("ERROR: negative strides not yet supported.");
59  #endif
60  }
62  YAKL_INLINE index_t to_scalar() const {
63  return (index_t) u+1;
64  }
66  YAKL_INLINE bool valid() const { return this->s > 0; }
67  };
68 
69 
71  // Bounds: Describes a set of loop bounds
72  // Simple bounds have constexpr lower bounds and strides for greater compiler optimizations
73  // unpackIndices transforms a global index ID into a set of multi-loop indices
75 
84  template <int N, bool simple = false> class Bounds;
85 
86 
87 
97  template<int N> class Bounds<N,true> {
98  public:
100  index_t nIter;
102  index_t dims[N];
107  YAKL_INLINE Bounds( index_t b0 , index_t b1=0 , index_t b2=0 , index_t b3=0 , index_t b4=0 , index_t b5=0 ,
108  index_t b6=0 , index_t b7=0 ) {
109  if constexpr (N >= 1) dims[0] = b0;
110  if constexpr (N >= 2) dims[1] = b1;
111  if constexpr (N >= 3) dims[2] = b2;
112  if constexpr (N >= 4) dims[3] = b3;
113  if constexpr (N >= 5) dims[4] = b4;
114  if constexpr (N >= 6) dims[5] = b5;
115  if constexpr (N >= 7) dims[6] = b6;
116  if constexpr (N >= 8) dims[7] = b7;
117  #ifdef YAKL_DEBUG
118  if (N >= 2) { if (b1 == 0) yakl_throw("ERROR: Too few bounds specified"); }
119  if (N >= 3) { if (b2 == 0) yakl_throw("ERROR: Too few bounds specified"); }
120  if (N >= 4) { if (b3 == 0) yakl_throw("ERROR: Too few bounds specified"); }
121  if (N >= 5) { if (b4 == 0) yakl_throw("ERROR: Too few bounds specified"); }
122  if (N >= 6) { if (b5 == 0) yakl_throw("ERROR: Too few bounds specified"); }
123  if (N >= 7) { if (b6 == 0) yakl_throw("ERROR: Too few bounds specified"); }
124  if (N >= 8) { if (b7 == 0) yakl_throw("ERROR: Too few bounds specified"); }
125  int num_bounds = 1;
126  if (b1 > 0) num_bounds++;
127  if (b2 > 0) num_bounds++;
128  if (b3 > 0) num_bounds++;
129  if (b4 > 0) num_bounds++;
130  if (b5 > 0) num_bounds++;
131  if (b6 > 0) num_bounds++;
132  if (b7 > 0) num_bounds++;
133  if (num_bounds != N) yakl_throw("ERROR: Number of bounds passed does not match templated number of bounds.");
134  #endif
135  nIter = 1;
136  for (int i=0; i<N; i++) { nIter *= dims[i]; }
137  }
139  YAKL_INLINE int lbound(int i) const {
140  #ifdef YAKL_DEBUG
141  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling lbound() on an out of bounds integer");
142  #endif
143  return 0;
144  }
146  YAKL_INLINE int dim (int i) const {
147  #ifdef YAKL_DEBUG
148  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling dim() on an out of bounds integer");
149  #endif
150  return dims[i];
151  }
153  YAKL_INLINE int stride(int i) const {
154  #ifdef YAKL_DEBUG
155  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling stride() on an out of bounds integer");
156  #endif
157  return 1;
158  }
160  YAKL_INLINE void unpackIndices( index_t iGlob , int indices[N] ) const {
161  if constexpr (N == 1) {
162  indices[0] = iGlob;
163  } else if constexpr (N == 2) {
164  indices[0] = iGlob/dims[1];
165  indices[1] = iGlob - dims[1]*indices[0];
166  } else if constexpr (N == 3) {
167  index_t fac, term;
168  fac = dims[1]*dims[2]; indices[0] = iGlob / fac;
169  term = indices[0]*fac; fac = dims[2]; indices[1] = (iGlob - term) / fac;
170  term += indices[1]*fac; indices[2] = iGlob - term;
171  } else if constexpr (N == 4) {
172  index_t fac, term;
173  fac = dims[1]*dims[2]*dims[3]; indices[0] = iGlob / fac;
174  term = indices[0]*fac; fac = dims[2]*dims[3]; indices[1] = (iGlob - term) / fac;
175  term += indices[1]*fac; fac = dims[3]; indices[2] = (iGlob - term) / fac;
176  term += indices[2]*fac; indices[3] = iGlob - term;
177  } else if constexpr (N == 5) {
178  index_t fac, term;
179  fac = dims[1]*dims[2]*dims[3]*dims[4]; indices[0] = iGlob / fac;
180  term = indices[0]*fac; fac = dims[2]*dims[3]*dims[4]; indices[1] = (iGlob - term) / fac;
181  term += indices[1]*fac; fac = dims[3]*dims[4]; indices[2] = (iGlob - term) / fac;
182  term += indices[2]*fac; fac = dims[4]; indices[3] = (iGlob - term) / fac;
183  term += indices[3]*fac; indices[4] = iGlob - term;
184  } else if constexpr (N == 6) {
185  index_t term, fac4=dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
186  indices[0] = iGlob / fac0;
187  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
188  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
189  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
190  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
191  term += indices[4]*fac4; indices[5] = iGlob - term;
192  } else if constexpr (N == 7) {
193  index_t term, fac5=dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
194  indices[0] = iGlob / fac0;
195  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
196  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
197  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
198  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
199  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
200  term += indices[5]*fac5; indices[6] = iGlob - term;
201  } else if constexpr (N == 8) {
202  index_t term, fac6=dims[7], fac5=fac6*dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
203  indices[0] = iGlob / fac0;
204  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
205  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
206  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
207  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
208  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
209  term += indices[5]*fac5; indices[6] = (iGlob - term) / fac6;
210  term += indices[6]*fac6; indices[7] = iGlob - term;
211  }
212  }
213  };
214 
215 
216 
226  template<int N> class Bounds<N,false> {
227  public:
229  index_t nIter;
231  int lbounds[N];
233  index_t dims[N];
235  index_t strides[N];
249  YAKL_INLINE Bounds( LBnd const &b0 , LBnd const &b1 = LBnd() , LBnd const &b2 = LBnd() , LBnd const &b3 = LBnd() ,
250  LBnd const &b4 = LBnd() , LBnd const &b5 = LBnd() , LBnd const &b6 = LBnd() ,
251  LBnd const &b7 = LBnd() ) {
252  if constexpr (N >= 1) { lbounds[0] = b0.l; strides[0] = b0.s; dims[0] = ( b0.u - b0.l + 1 ) / b0.s; }
253  if constexpr (N >= 2) { lbounds[1] = b1.l; strides[1] = b1.s; dims[1] = ( b1.u - b1.l + 1 ) / b1.s; }
254  if constexpr (N >= 3) { lbounds[2] = b2.l; strides[2] = b2.s; dims[2] = ( b2.u - b2.l + 1 ) / b2.s; }
255  if constexpr (N >= 4) { lbounds[3] = b3.l; strides[3] = b3.s; dims[3] = ( b3.u - b3.l + 1 ) / b3.s; }
256  if constexpr (N >= 5) { lbounds[4] = b4.l; strides[4] = b4.s; dims[4] = ( b4.u - b4.l + 1 ) / b4.s; }
257  if constexpr (N >= 6) { lbounds[5] = b5.l; strides[5] = b5.s; dims[5] = ( b5.u - b5.l + 1 ) / b5.s; }
258  if constexpr (N >= 7) { lbounds[6] = b6.l; strides[6] = b6.s; dims[6] = ( b6.u - b6.l + 1 ) / b6.s; }
259  if constexpr (N >= 8) { lbounds[7] = b7.l; strides[7] = b7.s; dims[7] = ( b7.u - b7.l + 1 ) / b7.s; }
260  #ifdef YAKL_DEBUG
261  if (N >= 2) { if (! b1.valid()) yakl_throw("ERROR: Too few bounds specified"); }
262  if (N >= 3) { if (! b2.valid()) yakl_throw("ERROR: Too few bounds specified"); }
263  if (N >= 4) { if (! b3.valid()) yakl_throw("ERROR: Too few bounds specified"); }
264  if (N >= 5) { if (! b4.valid()) yakl_throw("ERROR: Too few bounds specified"); }
265  if (N >= 6) { if (! b5.valid()) yakl_throw("ERROR: Too few bounds specified"); }
266  if (N >= 7) { if (! b6.valid()) yakl_throw("ERROR: Too few bounds specified"); }
267  if (N >= 8) { if (! b7.valid()) yakl_throw("ERROR: Too few bounds specified"); }
268  int num_bounds = 1;
269  if (b1.valid()) num_bounds++;
270  if (b2.valid()) num_bounds++;
271  if (b3.valid()) num_bounds++;
272  if (b4.valid()) num_bounds++;
273  if (b5.valid()) num_bounds++;
274  if (b6.valid()) num_bounds++;
275  if (b7.valid()) num_bounds++;
276  if (num_bounds != N) yakl_throw("ERROR: Number of bounds passed does not match templated number of bounds.");
277  #endif
278  nIter = 1;
279  for (int i=0; i<N; i++) { nIter *= dims[i]; }
280  }
282  YAKL_INLINE int lbound(int i) const {
283  #ifdef YAKL_DEBUG
284  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling lbound() on an out of bounds integer");
285  #endif
286  return lbounds[i];
287  }
289  YAKL_INLINE int dim (int i) const {
290  #ifdef YAKL_DEBUG
291  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling dim() on an out of bounds integer");
292  #endif
293  return dims [i];
294  }
296  YAKL_INLINE int stride(int i) const {
297  #ifdef YAKL_DEBUG
298  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling stride() on an out of bounds integer");
299  #endif
300  return strides[i];
301  }
303  YAKL_INLINE void unpackIndices( index_t iGlob , int indices[N] ) const {
304  // Compute base indices
305  if constexpr (N == 1) {
306  indices[0] = iGlob;
307  } else if constexpr (N == 2) {
308  indices[0] = iGlob/dims[1];
309  indices[1] = iGlob - dims[1]*indices[0];
310  } else if constexpr (N == 3) {
311  index_t fac, term;
312  fac = dims[1]*dims[2]; indices[0] = iGlob / fac;
313  term = indices[0]*fac; fac = dims[2]; indices[1] = (iGlob - term) / fac;
314  term += indices[1]*fac; indices[2] = iGlob - term;
315  } else if constexpr (N == 4) {
316  index_t fac, term;
317  fac = dims[1]*dims[2]*dims[3]; indices[0] = iGlob / fac;
318  term = indices[0]*fac; fac = dims[2]*dims[3]; indices[1] = (iGlob - term) / fac;
319  term += indices[1]*fac; fac = dims[3]; indices[2] = (iGlob - term) / fac;
320  term += indices[2]*fac; indices[3] = iGlob - term;
321  } else if constexpr (N == 5) {
322  index_t fac, term;
323  fac = dims[1]*dims[2]*dims[3]*dims[4]; indices[0] = iGlob / fac;
324  term = indices[0]*fac; fac = dims[2]*dims[3]*dims[4]; indices[1] = (iGlob - term) / fac;
325  term += indices[1]*fac; fac = dims[3]*dims[4]; indices[2] = (iGlob - term) / fac;
326  term += indices[2]*fac; fac = dims[4]; indices[3] = (iGlob - term) / fac;
327  term += indices[3]*fac; indices[4] = iGlob - term;
328  } else if constexpr (N == 6) {
329  index_t term, fac4=dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
330  indices[0] = iGlob / fac0;
331  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
332  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
333  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
334  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
335  term += indices[4]*fac4; indices[5] = iGlob - term;
336  } else if constexpr (N == 7) {
337  index_t term, fac5=dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
338  indices[0] = iGlob / fac0;
339  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
340  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
341  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
342  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
343  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
344  term += indices[5]*fac5; indices[6] = iGlob - term;
345  } else if constexpr (N == 8) {
346  index_t term, fac6=dims[7], fac5=fac6*dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
347  indices[0] = iGlob / fac0;
348  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
349  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
350  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
351  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
352  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
353  term += indices[5]*fac5; indices[6] = (iGlob - term) / fac6;
354  term += indices[6]*fac6; indices[7] = iGlob - term;
355  }
356 
357  // Apply strides and lower bounds
358  if constexpr (N >= 1) indices[0] = indices[0]*strides[0] + lbounds[0];
359  if constexpr (N >= 2) indices[1] = indices[1]*strides[1] + lbounds[1];
360  if constexpr (N >= 3) indices[2] = indices[2]*strides[2] + lbounds[2];
361  if constexpr (N >= 4) indices[3] = indices[3]*strides[3] + lbounds[3];
362  if constexpr (N >= 5) indices[4] = indices[4]*strides[4] + lbounds[4];
363  if constexpr (N >= 6) indices[5] = indices[5]*strides[5] + lbounds[5];
364  if constexpr (N >= 7) indices[6] = indices[6]*strides[6] + lbounds[6];
365  if constexpr (N >= 8) indices[7] = indices[7]*strides[7] + lbounds[7];
366  }
367  };
368 
370  template <int N> using SimpleBounds = Bounds<N,true>;
371 }
372 }
374 
375 
yakl::c::Bounds< N, true >::unpackIndices
YAKL_INLINE void unpackIndices(index_t iGlob, int indices[N]) const
Unpack a global index into N loop indices given bounds and strides.
Definition: YAKL_Bounds_c.h:160
yakl::c::LBnd::LBnd
YAKL_INLINE LBnd(index_t u)
Lower bound of zero, stride of one.
Definition: YAKL_Bounds_c.h:31
yakl::c::Bounds< N, false >::Bounds
YAKL_INLINE Bounds(LBnd const &b0, LBnd const &b1=LBnd(), LBnd const &b2=LBnd(), LBnd const &b3=LBnd(), LBnd const &b4=LBnd(), LBnd const &b5=LBnd(), LBnd const &b6=LBnd(), LBnd const &b7=LBnd())
Declares the bounds for each loop for a set of 1 to 8 tightly-nested loops.
Definition: YAKL_Bounds_c.h:249
yakl::c::LBnd::LBnd
YAKL_INLINE LBnd(int u)
Lower bound of zero, stride of one.
Definition: YAKL_Bounds_c.h:37
yakl::c::LBnd::default_lbound
static constexpr int default_lbound
Definition: YAKL_Bounds_c.h:17
yakl::c::LBnd::s
int s
stride
Definition: YAKL_Bounds_c.h:23
yakl::c::Bounds
Describes a set of C-style tightly-nested loops.
Definition: YAKL_Bounds_c.h:84
yakl::c::Bounds< N, true >::dim
YAKL_INLINE int dim(int i) const
Get the total number of iterations for this loop index.
Definition: YAKL_Bounds_c.h:146
yakl::c::Bounds< N, false >::stride
YAKL_INLINE int stride(int i) const
Get the stride for this loop index.
Definition: YAKL_Bounds_c.h:296
yakl::c::Bounds< N, false >::lbound
YAKL_INLINE int lbound(int i) const
Get the lower loop bound for this loop index.
Definition: YAKL_Bounds_c.h:282
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
yakl::c::Bounds< N, true >::Bounds
YAKL_INLINE Bounds(index_t b0, index_t b1=0, index_t b2=0, index_t b3=0, index_t b4=0, index_t b5=0, index_t b6=0, index_t b7=0)
Declares the total number of iterations for each loop for a set of 1 to 8 tightly-nested loops.
Definition: YAKL_Bounds_c.h:107
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
yakl::c::LBnd::LBnd
YAKL_INLINE LBnd(int l, int u, int s)
Lower bound, upper bound, and stride all specified.
Definition: YAKL_Bounds_c.h:52
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::c::Bounds< N, false >::dim
YAKL_INLINE int dim(int i) const
Get the total number of iterations for this loop index.
Definition: YAKL_Bounds_c.h:289
yakl::c::LBnd
Describes a single C-style loop bound (lower bound default of 0)
Definition: YAKL_Bounds_c.h:15
yakl::c::LBnd::u
int u
upper bound
Definition: YAKL_Bounds_c.h:21
yakl::c::Bounds< N, false >::unpackIndices
YAKL_INLINE void unpackIndices(index_t iGlob, int indices[N]) const
Unpack a global index into N loop indices given bounds and strides.
Definition: YAKL_Bounds_c.h:303
yakl::index_t
unsigned int index_t
Definition: YAKL.h:41
yakl::yakl_throw
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
yakl::c::LBnd::LBnd
YAKL_INLINE LBnd(int l, int u)
Lower and upper bounds specified, stride of one.
Definition: YAKL_Bounds_c.h:43
yakl::c::LBnd::l
int l
lower bound
Definition: YAKL_Bounds_c.h:19
yakl::c::LBnd::valid
YAKL_INLINE bool valid() const
Returns whether this loop bound is valid / initialized.
Definition: YAKL_Bounds_c.h:66
yakl::c::Bounds< N, true >::lbound
YAKL_INLINE int lbound(int i) const
Get the lower loop bound for this loop index.
Definition: YAKL_Bounds_c.h:139
yakl::c::LBnd::LBnd
YAKL_INLINE LBnd()
defines an invalid / uninitialized loop bound
Definition: YAKL_Bounds_c.h:25
yakl::c::Bounds< N, true >
Describes a set of C-style tightly-nested loops where all loops have lower bounds of 0 strides of 1.
Definition: YAKL_Bounds_c.h:97
yakl
yakl::c::Bounds< N, true >::stride
YAKL_INLINE int stride(int i) const
Get the stride for this loop index.
Definition: YAKL_Bounds_c.h:153