YAKL
YAKL_pentadiagonal.h
Go to the documentation of this file.
1 
2 #pragma once
3 
4 #include "YAKL.h"
5 
6 
8 namespace yakl {
9 
10 
11 
13  template <unsigned int n, class real>
14  YAKL_INLINE real penta_sum(SArray<real,1,n> const &v, SArray<real,1,n> const &z) {
15  real sum = 0;
16  for (int k=0; k < n; k++) {
17  sum += v(k)*z(k);
18  }
19  return sum;
20  }
21 
22 
23 
25  template <unsigned int n, class real>
26  YAKL_INLINE void matrix_inverse_small(SArray<real,2,n,n> &a) {
27  SArray<real,2,n,n> inv;
28 
29  // Initialize inverse as identity
30  for (int icol = 0; icol < n; icol++) {
31  for (int irow = 0; irow < n; irow++) {
32  if (icol == irow) {
33  inv(icol,irow) = 1;
34  } else {
35  inv(icol,irow) = 0;
36  }
37  }
38  }
39 
40  // Gaussian elimination to zero out lower
41  for (int idiag = 0; idiag < n; idiag++) {
42  // Divide out the diagonal component from the first row
43  real factor = static_cast<real>(1)/a(idiag,idiag);
44  for (int icol = idiag; icol < n; icol++) {
45  a(icol,idiag) *= factor;
46  }
47  for (int icol = 0; icol < n; icol++) {
48  inv(icol,idiag) *= factor;
49  }
50  for (int irow = idiag+1; irow < n; irow++) {
51  real factor = a(idiag,irow);
52  for (int icol = idiag; icol < n; icol++) {
53  a (icol,irow) -= factor * a (icol,idiag);
54  }
55  for (int icol = 0; icol < n; icol++) {
56  inv(icol,irow) -= factor * inv(icol,idiag);
57  }
58  }
59  }
60 
61  // Gaussian elimination to zero out upper
62  for (int idiag = n-1; idiag >= 1; idiag--) {
63  for (int irow = 0; irow < idiag; irow++) {
64  real factor = a(idiag,irow);
65  for (int icol = irow+1; icol < n; icol++) {
66  a (icol,irow) -= factor * a (icol,idiag);
67  }
68  for (int icol = 0; icol < n; icol++) {
69  inv(icol,irow) -= factor * inv(icol,idiag);
70  }
71  }
72  }
73 
74  // Overwrite matrix with inverse
75  for (int icol = 0; icol < n; icol++) {
76  for (int irow = 0; irow < n; irow++) {
77  a(icol,irow) = inv(icol,irow);
78  }
79  }
80  }
81 
82 
83 
95  template <unsigned int n, class real>
97  SArray<real,1,n> const &b,
98  SArray<real,1,n> const &c,
99  SArray<real,1,n> const &d,
100  SArray<real,1,n> const &e,
101  SArray<real,1,n> const &f,
102  SArray<real,1,n> &u) {
103 
104  // in its clearest incarnation, this algorithm uses three storage arrays
105  // called p, q and r. here, the solution vector u is used for r, cutting
106  // the extra storage down to two arrays.
107  SArray<real,1,n> p, q;
108  real bet, den;
109 
110  // initialize elimination and backsubstitution arrays
111  bet = static_cast<real>(1)/c(0);
112  p(0) = -d(0) * bet;
113  q(0) = -e(0) * bet;
114  u(0) = f(0) * bet;
115 
116  bet = c(1) + b(1)*p(0);
117  bet = -static_cast<real>(1)/bet;
118  p(1) = (d(1) + b(1)*q(0)) * bet;
119  q(1) = e(1) * bet;
120  u(1) = (b(1)*u(0) - f(1)) * bet;
121 
122  // reduce to upper triangular
123  for (int i=2; i < n; i++) {
124  bet = b(i) + a(i) * p(i-2);
125  den = c(i) + a(i)*q(i-2) + bet*p(i-1);
126  den = -static_cast<real>(1)/den;
127  p(i) = (d(i) + bet*q(i-1)) * den;
128  q(i) = e(i) * den;
129  u(i) = (a(i)*u(i-2) + bet*u(i-1) - f(i)) * den;
130  }
131 
132  // backsubstitution
133  u(n-2) = u(n-2) + p(n-2) * u(n-1);
134  for (int i=n-3; i >= 0; i--) {
135  u(i) = u(i) + p(i) * u(i+1) + q(i) * u(i+2);
136  }
137  }
138 
139 
140 
152  template <unsigned int n, class real>
154  SArray<real,1,n> const &b,
155  SArray<real,1,n> const &c,
156  SArray<real,1,n> const &d,
157  SArray<real,1,n> const &e,
158  SArray<real,1,n> const &f,
159  SArray<real,1,n> &x) {
160 
161  SArray<real,1,n> u1, u2, u3, u4, v1, v2, v3, v4, z1, z2, z3, z4, r, s, y;
162  SArray<real,2,4,4> h, p;
163 
164  real cp1 = a(0);
165  real cp2 = b(0);
166  real cp3 = a(1);
167  real cp4 = e(n-2);
168  real cp5 = d(n-1);
169  real cp6 = e(n-1);
170 
171  for (int i=0; i < n; i++) {
172  u1(i) = 0; u2(i) = 0; u3(i) = 0; u4(i) = 0;
173  v1(i) = 0; v2(i) = 0; v3(i) = 0; v4(i) = 0;
174  z1(i) = 0; z2(i) = 0; z3(i) = 0; z4(i) = 0;
175  }
176 
177  u1(0 ) = 1;
178  u2(1 ) = 1;
179  u3(n-2) = 1;
180  u4(n-1) = 1;
181 
182  v1(n-2) = cp1;
183  v1(n-1) = cp2;
184  v2(n-1) = cp3;
185  v3(0 ) = cp4;
186  v4(0 ) = cp5;
187  v4(1 ) = cp6;
188 
189  pentadiagonal(a,b,c,d,e,u1,z1);
190  pentadiagonal(a,b,c,d,e,u2,z2);
191  pentadiagonal(a,b,c,d,e,u3,z3);
192  pentadiagonal(a,b,c,d,e,u4,z4);
193  pentadiagonal(a,b,c,d,e,f ,y );
194 
195  p(0,0) = penta_sum(v1,z1);
196  p(0,1) = penta_sum(v1,z2);
197  p(0,2) = penta_sum(v1,z3);
198  p(0,3) = penta_sum(v1,z4);
199 
200  p(1,0) = penta_sum(v2,z1);
201  p(1,1) = penta_sum(v2,z2);
202  p(1,2) = penta_sum(v2,z3);
203  p(1,3) = penta_sum(v2,z4);
204 
205  p(2,0) = penta_sum(v3,z1);
206  p(2,1) = penta_sum(v3,z2);
207  p(2,2) = penta_sum(v3,z3);
208  p(2,3) = penta_sum(v3,z4);
209 
210  p(3,0) = penta_sum(v4,z1);
211  p(3,1) = penta_sum(v4,z2);
212  p(3,2) = penta_sum(v4,z3);
213  p(3,3) = penta_sum(v4,z4);
214 
215  for (int i=0; i < 4; i++) {
216  p(i,i) = p(i,i) + 1;
217  }
218 
219  matrix_inverse_small(p);
220 
221  r(0) = 0;
222  r(1) = 0;
223  r(2) = 0;
224  r(3) = 0;
225  for (int k=0; k < n; k++) {
226  r(0) = r(0) + v1(k) * y(k);
227  r(1) = r(1) + v2(k) * y(k);
228  r(2) = r(2) + v3(k) * y(k);
229  r(3) = r(3) + v4(k) * y(k);
230  }
231 
232  for (int j=0; j < 4; j++) {
233  s(j) = 0;
234  for (int k=0; k < 4; k++) {
235  s(j) = s(j) + p(j,k) * r(k);
236  }
237  }
238 
239  for (int j=0; j < n; j++) {
240  real sum = z1(j)*s(0) + z2(j)*s(1) + z3(j)*s(2) + z4(j)*s(3);
241  x(j) = y(j) - sum;
242  }
243  }
244 
245 }
247 
248 
YAKL.h
yakl::pentadiagonal
YAKL_INLINE void pentadiagonal(SArray< real, 1, n > const &a, SArray< real, 1, n > const &b, SArray< real, 1, n > const &c, SArray< real, 1, n > const &d, SArray< real, 1, n > const &e, SArray< real, 1, n > const &f, SArray< real, 1, n > &u)
Performs a small non-periodic pentadiagonal solve. Click for more details.
Definition: YAKL_pentadiagonal.h:96
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::intrinsics::sum
T sum(Array< T, rank, memHost, myStyle > const &arr)
Definition: YAKL_intrinsics_sum.h:10
yakl::CSArray
C-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_CSArray.h:30
yakl::pentadiagonal_periodic
YAKL_INLINE void pentadiagonal_periodic(SArray< real, 1, n > const &a, SArray< real, 1, n > const &b, SArray< real, 1, n > const &c, SArray< real, 1, n > const &d, SArray< real, 1, n > const &e, SArray< real, 1, n > const &f, SArray< real, 1, n > &x)
Performs a small periodic pentadiagonal solve. Click for more details.
Definition: YAKL_pentadiagonal.h:153
yakl