YAKL
YAKL_Bounds_fortran.h
Go to the documentation of this file.
1 
7 #pragma once
8 // Included by YAKL_parallel_for_fortran.h
9 
11 namespace yakl {
12 namespace fortran {
13 
15  class LBnd {
16  public:
17  int static constexpr default_lbound = 1;
19  int l;
21  int u;
23  int s;
26  this->l = -1;
27  this->u = -1;
28  this->s = -1;
29  }
32  this->l = 1;
33  this->u = u;
34  this->s = 1;
35  }
37  YAKL_INLINE LBnd(int l, int u) {
38  this->l = l;
39  this->u = u;
40  this->s = 1;
41  #ifdef YAKL_DEBUG
42  if (u < l) yakl_throw("ERROR: cannot specify an upper bound < lower bound");
43  #endif
44  }
46  YAKL_INLINE LBnd(int l, int u, int s) {
47  this->l = l;
48  this->u = u;
49  this->s = s;
50  #ifdef YAKL_DEBUG
51  if (u < l) yakl_throw("ERROR: cannot specify an upper bound < lower bound");
52  if (s < 1) yakl_throw("ERROR: negative strides not yet supported.");
53  #endif
54  }
56  YAKL_INLINE index_t to_scalar() {
57  return (index_t) u;
58  }
60  YAKL_INLINE bool valid() const { return this->s > 0; }
61  };
62 
63 
64 
66  // Bounds: Describes a set of loop bounds
67  // Simple bounds have constexpr lower bounds and strides for greater compiler optimizations
68  // unpackIndices transforms a global index ID into a set of multi-loop indices
70 
79  template <int N, bool simple = false> class Bounds;
80 
81 
82 
92  template<int N> class Bounds<N,true> {
93  public:
95  index_t nIter;
97  index_t dims[N];
102  YAKL_INLINE Bounds( index_t b0 , index_t b1=0 , index_t b2=0 , index_t b3=0 , index_t b4=0 , index_t b5=0 ,
103  index_t b6=0 , index_t b7=0 ) {
104  if constexpr (N >= 1) dims[0] = b0;
105  if constexpr (N >= 2) dims[1] = b1;
106  if constexpr (N >= 3) dims[2] = b2;
107  if constexpr (N >= 4) dims[3] = b3;
108  if constexpr (N >= 5) dims[4] = b4;
109  if constexpr (N >= 6) dims[5] = b5;
110  if constexpr (N >= 7) dims[6] = b6;
111  if constexpr (N >= 8) dims[7] = b7;
112  #ifdef YAKL_DEBUG
113  if (N >= 2) { if (b1 == 0) yakl_throw("ERROR: Too few bounds specified"); }
114  if (N >= 3) { if (b2 == 0) yakl_throw("ERROR: Too few bounds specified"); }
115  if (N >= 4) { if (b3 == 0) yakl_throw("ERROR: Too few bounds specified"); }
116  if (N >= 5) { if (b4 == 0) yakl_throw("ERROR: Too few bounds specified"); }
117  if (N >= 6) { if (b5 == 0) yakl_throw("ERROR: Too few bounds specified"); }
118  if (N >= 7) { if (b6 == 0) yakl_throw("ERROR: Too few bounds specified"); }
119  if (N >= 8) { if (b7 == 0) yakl_throw("ERROR: Too few bounds specified"); }
120  int num_bounds = 1;
121  if (b1 > 0) num_bounds++;
122  if (b2 > 0) num_bounds++;
123  if (b3 > 0) num_bounds++;
124  if (b4 > 0) num_bounds++;
125  if (b5 > 0) num_bounds++;
126  if (b6 > 0) num_bounds++;
127  if (b7 > 0) num_bounds++;
128  if (num_bounds != N) yakl_throw("ERROR: Number of bounds passed does not match templated number of bounds.");
129  #endif
130  nIter = 1;
131  for (int i=0; i<N; i++) { nIter *= dims[i]; }
132  }
134  YAKL_INLINE int lbound(int i) const {
135  #ifdef YAKL_DEBUG
136  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling lbound() on an out of bounds integer");
137  #endif
138  return 1;
139  }
141  YAKL_INLINE int dim (int i) const {
142  #ifdef YAKL_DEBUG
143  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling dim() on an out of bounds integer");
144  #endif
145  return dims[i];
146  }
148  YAKL_INLINE int stride(int i) const {
149  #ifdef YAKL_DEBUG
150  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling stride() on an out of bounds integer");
151  #endif
152  return 1;
153  }
155  YAKL_INLINE void unpackIndices( index_t iGlob , int indices[N] ) const {
156  if constexpr (N == 1) {
157  indices[0] = iGlob;
158  } else if constexpr (N == 2) {
159  indices[0] = iGlob/dims[1] ;
160  indices[1] = iGlob - dims[1]*indices[0];
161  } else if constexpr (N == 3) {
162  index_t fac, term;
163  fac = dims[1]*dims[2]; indices[0] = iGlob / fac;
164  term = indices[0]*fac; fac = dims[2]; indices[1] = (iGlob - term) / fac;
165  term += indices[1]*fac; indices[2] = iGlob - term ;
166  } else if constexpr (N == 4) {
167  index_t fac, term;
168  fac = dims[1]*dims[2]*dims[3]; indices[0] = iGlob / fac;
169  term = indices[0]*fac; fac = dims[2]*dims[3]; indices[1] = (iGlob - term) / fac;
170  term += indices[1]*fac; fac = dims[3]; indices[2] = (iGlob - term) / fac;
171  term += indices[2]*fac; indices[3] = iGlob - term ;
172  } else if constexpr (N == 5) {
173  index_t fac, term;
174  fac = dims[1]*dims[2]*dims[3]*dims[4]; indices[0] = iGlob / fac;
175  term = indices[0]*fac; fac = dims[2]*dims[3]*dims[4]; indices[1] = (iGlob - term) / fac;
176  term += indices[1]*fac; fac = dims[3]*dims[4]; indices[2] = (iGlob - term) / fac;
177  term += indices[2]*fac; fac = dims[4]; indices[3] = (iGlob - term) / fac;
178  term += indices[3]*fac; indices[4] = iGlob - term ;
179  } else if constexpr (N == 6) {
180  index_t term, fac4=dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
181  indices[0] = iGlob / fac0;
182  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
183  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
184  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
185  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
186  term += indices[4]*fac4; indices[5] = iGlob - term ;
187  } else if constexpr (N == 7) {
188  index_t term, fac5=dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
189  indices[0] = iGlob / fac0;
190  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
191  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
192  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
193  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
194  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
195  term += indices[5]*fac5; indices[6] = iGlob - term ;
196  } else if constexpr (N == 8) {
197  index_t term, fac6=dims[7], fac5=fac6*dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
198  indices[0] = iGlob / fac0;
199  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
200  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
201  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
202  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
203  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
204  term += indices[5]*fac5; indices[6] = (iGlob - term) / fac6;
205  term += indices[6]*fac6; indices[7] = iGlob - term ;
206  }
207  if constexpr (N >= 1) indices[0]++;
208  if constexpr (N >= 2) indices[1]++;
209  if constexpr (N >= 3) indices[2]++;
210  if constexpr (N >= 4) indices[3]++;
211  if constexpr (N >= 5) indices[4]++;
212  if constexpr (N >= 6) indices[5]++;
213  if constexpr (N >= 7) indices[6]++;
214  if constexpr (N >= 8) indices[7]++;
215  }
216  };
217 
218 
219 
229  template<int N> class Bounds<N,false> {
230  public:
232  index_t nIter;
234  int lbounds[N];
236  index_t dims[N];
238  index_t strides[N];
252  YAKL_INLINE Bounds( LBnd const &b0 , LBnd const &b1 = LBnd() , LBnd const &b2 = LBnd() , LBnd const &b3 = LBnd() ,
253  LBnd const &b4 = LBnd() , LBnd const &b5 = LBnd() , LBnd const &b6 = LBnd() ,
254  LBnd const &b7 = LBnd() ) {
255  if constexpr (N >= 1) { lbounds[0] = b0.l; strides[0] = b0.s; dims[0] = ( b0.u - b0.l + 1 ) / b0.s; }
256  if constexpr (N >= 2) { lbounds[1] = b1.l; strides[1] = b1.s; dims[1] = ( b1.u - b1.l + 1 ) / b1.s; }
257  if constexpr (N >= 3) { lbounds[2] = b2.l; strides[2] = b2.s; dims[2] = ( b2.u - b2.l + 1 ) / b2.s; }
258  if constexpr (N >= 4) { lbounds[3] = b3.l; strides[3] = b3.s; dims[3] = ( b3.u - b3.l + 1 ) / b3.s; }
259  if constexpr (N >= 5) { lbounds[4] = b4.l; strides[4] = b4.s; dims[4] = ( b4.u - b4.l + 1 ) / b4.s; }
260  if constexpr (N >= 6) { lbounds[5] = b5.l; strides[5] = b5.s; dims[5] = ( b5.u - b5.l + 1 ) / b5.s; }
261  if constexpr (N >= 7) { lbounds[6] = b6.l; strides[6] = b6.s; dims[6] = ( b6.u - b6.l + 1 ) / b6.s; }
262  if constexpr (N >= 8) { lbounds[7] = b7.l; strides[7] = b7.s; dims[7] = ( b7.u - b7.l + 1 ) / b7.s; }
263  #ifdef YAKL_DEBUG
264  if (N >= 2) { if (! b1.valid()) yakl_throw("ERROR: Too few bounds specified"); }
265  if (N >= 3) { if (! b2.valid()) yakl_throw("ERROR: Too few bounds specified"); }
266  if (N >= 4) { if (! b3.valid()) yakl_throw("ERROR: Too few bounds specified"); }
267  if (N >= 5) { if (! b4.valid()) yakl_throw("ERROR: Too few bounds specified"); }
268  if (N >= 6) { if (! b5.valid()) yakl_throw("ERROR: Too few bounds specified"); }
269  if (N >= 7) { if (! b6.valid()) yakl_throw("ERROR: Too few bounds specified"); }
270  if (N >= 8) { if (! b7.valid()) yakl_throw("ERROR: Too few bounds specified"); }
271  int num_bounds = 1;
272  if (b1.valid()) num_bounds++;
273  if (b2.valid()) num_bounds++;
274  if (b3.valid()) num_bounds++;
275  if (b4.valid()) num_bounds++;
276  if (b5.valid()) num_bounds++;
277  if (b6.valid()) num_bounds++;
278  if (b7.valid()) num_bounds++;
279  if (num_bounds != N) yakl_throw("ERROR: Number of bounds passed does not match templated number of bounds.");
280  #endif
281  nIter = 1;
282  for (int i=0; i<N; i++) { nIter *= dims[i]; }
283  }
285  YAKL_INLINE int lbound(int i) const {
286  #ifdef YAKL_DEBUG
287  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling lbound() on an out of bounds integer");
288  #endif
289  return lbounds[i];
290  }
292  YAKL_INLINE int dim (int i) const {
293  #ifdef YAKL_DEBUG
294  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling dim() on an out of bounds integer");
295  #endif
296  return dims [i];
297  }
299  YAKL_INLINE int stride(int i) const {
300  #ifdef YAKL_DEBUG
301  if (i < 0 || i > N-1) yakl_throw("ERROR: Calling stride() on an out of bounds integer");
302  #endif
303  return strides[i];
304  }
306  YAKL_INLINE void unpackIndices( index_t iGlob , int indices[N] ) const {
307  // Compute base indices
308  if constexpr (N == 1) {
309  indices[0] = iGlob;
310  } else if constexpr (N == 2) {
311  indices[0] = iGlob/dims[1] ;
312  indices[1] = iGlob - dims[1]*indices[0];
313  } else if constexpr (N == 3) {
314  index_t fac, term;
315  fac = dims[1]*dims[2]; indices[0] = iGlob / fac;
316  term = indices[0]*fac; fac = dims[2]; indices[1] = (iGlob - term) / fac;
317  term += indices[1]*fac; indices[2] = iGlob - term ;
318  } else if constexpr (N == 4) {
319  index_t fac, term;
320  fac = dims[1]*dims[2]*dims[3]; indices[0] = iGlob / fac;
321  term = indices[0]*fac; fac = dims[2]*dims[3]; indices[1] = (iGlob - term) / fac;
322  term += indices[1]*fac; fac = dims[3]; indices[2] = (iGlob - term) / fac;
323  term += indices[2]*fac; indices[3] = iGlob - term ;
324  } else if constexpr (N == 5) {
325  index_t fac, term;
326  fac = dims[1]*dims[2]*dims[3]*dims[4]; indices[0] = iGlob / fac;
327  term = indices[0]*fac; fac = dims[2]*dims[3]*dims[4]; indices[1] = (iGlob - term) / fac;
328  term += indices[1]*fac; fac = dims[3]*dims[4]; indices[2] = (iGlob - term) / fac;
329  term += indices[2]*fac; fac = dims[4]; indices[3] = (iGlob - term) / fac;
330  term += indices[3]*fac; indices[4] = iGlob - term ;
331  } else if constexpr (N == 6) {
332  index_t term, fac4=dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
333  indices[0] = iGlob / fac0;
334  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
335  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
336  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
337  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
338  term += indices[4]*fac4; indices[5] = iGlob - term ;
339  } else if constexpr (N == 7) {
340  index_t term, fac5=dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
341  indices[0] = iGlob / fac0;
342  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
343  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
344  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
345  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
346  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
347  term += indices[5]*fac5; indices[6] = iGlob - term ;
348  } else if constexpr (N == 8) {
349  index_t term, fac6=dims[7], fac5=fac6*dims[6], fac4=fac5*dims[5], fac3=fac4*dims[4], fac2=fac3*dims[3], fac1=fac2*dims[2], fac0=fac1*dims[1];
350  indices[0] = iGlob / fac0;
351  term = indices[0]*fac0; indices[1] = (iGlob - term) / fac1;
352  term += indices[1]*fac1; indices[2] = (iGlob - term) / fac2;
353  term += indices[2]*fac2; indices[3] = (iGlob - term) / fac3;
354  term += indices[3]*fac3; indices[4] = (iGlob - term) / fac4;
355  term += indices[4]*fac4; indices[5] = (iGlob - term) / fac5;
356  term += indices[5]*fac5; indices[6] = (iGlob - term) / fac6;
357  term += indices[6]*fac6; indices[7] = iGlob - term ;
358  }
359 
360  // Apply strides and lower bounds
361  if constexpr (N >= 1) indices[0] = indices[0]*strides[0] + lbounds[0];
362  if constexpr (N >= 2) indices[1] = indices[1]*strides[1] + lbounds[1];
363  if constexpr (N >= 3) indices[2] = indices[2]*strides[2] + lbounds[2];
364  if constexpr (N >= 4) indices[3] = indices[3]*strides[3] + lbounds[3];
365  if constexpr (N >= 5) indices[4] = indices[4]*strides[4] + lbounds[4];
366  if constexpr (N >= 6) indices[5] = indices[5]*strides[5] + lbounds[5];
367  if constexpr (N >= 7) indices[6] = indices[6]*strides[6] + lbounds[6];
368  if constexpr (N >= 8) indices[7] = indices[7]*strides[7] + lbounds[7];
369  }
370  };
371 
373  template <int N> using SimpleBounds = Bounds<N,true>;
374 
375 }
376 }
378 
379 
yakl::fortran::Bounds< N, true >::unpackIndices
YAKL_INLINE void unpackIndices(index_t iGlob, int indices[N]) const
Unpack a global index into N loop indices given bounds and strides.
Definition: YAKL_Bounds_fortran.h:155
yakl::fortran::LBnd::u
int u
upper bound
Definition: YAKL_Bounds_fortran.h:21
yakl::fortran::LBnd::LBnd
YAKL_INLINE LBnd(int l, int u, int s)
Lower bound, upper bound, and stride all specified.
Definition: YAKL_Bounds_fortran.h:46
yakl::fortran::Bounds< N, true >::dim
YAKL_INLINE int dim(int i) const
Get the total number of iterations for this loop index.
Definition: YAKL_Bounds_fortran.h:141
yakl::fortran::Bounds< N, true >::lbound
YAKL_INLINE int lbound(int i) const
Get the lower loop bound for this loop index.
Definition: YAKL_Bounds_fortran.h:134
yakl::fortran::LBnd::LBnd
YAKL_INLINE LBnd()
defines an invalid / uninitialized loop bound
Definition: YAKL_Bounds_fortran.h:25
yakl::fortran::LBnd::default_lbound
static constexpr int default_lbound
Definition: YAKL_Bounds_fortran.h:17
yakl::fortran::LBnd::s
int s
stride
Definition: YAKL_Bounds_fortran.h:23
yakl::fortran::Bounds< N, true >::Bounds
YAKL_INLINE Bounds(index_t b0, index_t b1=0, index_t b2=0, index_t b3=0, index_t b4=0, index_t b5=0, index_t b6=0, index_t b7=0)
Declares the total number of iterations for each loop for a set of 1 to 8 tightly-nested loops.
Definition: YAKL_Bounds_fortran.h:102
yakl::fortran::LBnd::valid
YAKL_INLINE bool valid() const
Returns whether this loop bound is valid / initialized.
Definition: YAKL_Bounds_fortran.h:60
yakl::fortran::LBnd::LBnd
YAKL_INLINE LBnd(int u)
Lower bound of one, stride of one.
Definition: YAKL_Bounds_fortran.h:31
yakl::fortran::Bounds< N, true >
Describes a set of Fortran-style tightly-nested loops where all loops have lower bounds of 1 strides ...
Definition: YAKL_Bounds_fortran.h:92
yakl::fortran::Bounds< N, true >::stride
YAKL_INLINE int stride(int i) const
Get the stride for this loop index.
Definition: YAKL_Bounds_fortran.h:148
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
yakl::fortran::Bounds< N, false >::stride
YAKL_INLINE int stride(int i) const
Get the stride for this loop index.
Definition: YAKL_Bounds_fortran.h:299
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
yakl::fortran::Bounds< N, false >::unpackIndices
YAKL_INLINE void unpackIndices(index_t iGlob, int indices[N]) const
Unpack a global index into N loop indices given bounds and strides.
Definition: YAKL_Bounds_fortran.h:306
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::fortran::Bounds< N, false >::lbound
YAKL_INLINE int lbound(int i) const
Get the lower loop bound for this loop index.
Definition: YAKL_Bounds_fortran.h:285
yakl::fortran::Bounds< N, false >::dim
YAKL_INLINE int dim(int i) const
Get the total number of iterations for this loop index.
Definition: YAKL_Bounds_fortran.h:292
yakl::index_t
unsigned int index_t
Definition: YAKL.h:41
yakl::yakl_throw
YAKL_INLINE void yakl_throw(const char *msg)
Throw an error message. Works from the host or device.
Definition: YAKL_error.h:17
yakl::fortran::Bounds< N, false >::Bounds
YAKL_INLINE Bounds(LBnd const &b0, LBnd const &b1=LBnd(), LBnd const &b2=LBnd(), LBnd const &b3=LBnd(), LBnd const &b4=LBnd(), LBnd const &b5=LBnd(), LBnd const &b6=LBnd(), LBnd const &b7=LBnd())
Declares the bounds for each loop for a set of 1 to 8 tightly-nested loops.
Definition: YAKL_Bounds_fortran.h:252
yakl::fortran::Bounds
Describes a set of Fortran-style tightly-nested loops.
Definition: YAKL_Bounds_fortran.h:79
yakl
yakl::fortran::LBnd::l
int l
lower bound
Definition: YAKL_Bounds_fortran.h:19
yakl::fortran::LBnd
Describes a single Fortran-style loop bound (lower bound default of 1)
Definition: YAKL_Bounds_fortran.h:15
yakl::fortran::LBnd::LBnd
YAKL_INLINE LBnd(int l, int u)
Lower and upper bounds specified, stride of one.
Definition: YAKL_Bounds_fortran.h:37