13 template <
unsigned int n,
class real>
14 YAKL_INLINE real penta_sum(SArray<real,1,n>
const &v, SArray<real,1,n>
const &z) {
16 for (
int k=0; k < n; k++) {
25 template <
unsigned int n,
class real>
26 YAKL_INLINE void matrix_inverse_small(SArray<real,2,n,n> &a) {
27 SArray<real,2,n,n> inv;
30 for (
int icol = 0; icol < n; icol++) {
31 for (
int irow = 0; irow < n; irow++) {
41 for (
int idiag = 0; idiag < n; idiag++) {
43 real factor =
static_cast<real
>(1)/a(idiag,idiag);
44 for (
int icol = idiag; icol < n; icol++) {
45 a(icol,idiag) *= factor;
47 for (
int icol = 0; icol < n; icol++) {
48 inv(icol,idiag) *= factor;
50 for (
int irow = idiag+1; irow < n; irow++) {
51 real factor = a(idiag,irow);
52 for (
int icol = idiag; icol < n; icol++) {
53 a (icol,irow) -= factor * a (icol,idiag);
55 for (
int icol = 0; icol < n; icol++) {
56 inv(icol,irow) -= factor * inv(icol,idiag);
62 for (
int idiag = n-1; idiag >= 1; idiag--) {
63 for (
int irow = 0; irow < idiag; irow++) {
64 real factor = a(idiag,irow);
65 for (
int icol = irow+1; icol < n; icol++) {
66 a (icol,irow) -= factor * a (icol,idiag);
68 for (
int icol = 0; icol < n; icol++) {
69 inv(icol,irow) -= factor * inv(icol,idiag);
75 for (
int icol = 0; icol < n; icol++) {
76 for (
int irow = 0; irow < n; irow++) {
77 a(icol,irow) = inv(icol,irow);
95 template <
unsigned int n,
class real>
111 bet =
static_cast<real
>(1)/c(0);
116 bet = c(1) + b(1)*p(0);
117 bet = -
static_cast<real
>(1)/bet;
118 p(1) = (d(1) + b(1)*q(0)) * bet;
120 u(1) = (b(1)*u(0) - f(1)) * bet;
123 for (
int i=2; i < n; i++) {
124 bet = b(i) + a(i) * p(i-2);
125 den = c(i) + a(i)*q(i-2) + bet*p(i-1);
126 den = -
static_cast<real
>(1)/den;
127 p(i) = (d(i) + bet*q(i-1)) * den;
129 u(i) = (a(i)*u(i-2) + bet*u(i-1) - f(i)) * den;
133 u(n-2) = u(n-2) + p(n-2) * u(n-1);
134 for (
int i=n-3; i >= 0; i--) {
135 u(i) = u(i) + p(i) * u(i+1) + q(i) * u(i+2);
152 template <
unsigned int n,
class real>
161 SArray<real,1,n> u1, u2, u3, u4, v1, v2, v3, v4, z1, z2, z3, z4, r, s, y;
171 for (
int i=0; i < n; i++) {
172 u1(i) = 0; u2(i) = 0; u3(i) = 0; u4(i) = 0;
173 v1(i) = 0; v2(i) = 0; v3(i) = 0; v4(i) = 0;
174 z1(i) = 0; z2(i) = 0; z3(i) = 0; z4(i) = 0;
195 p(0,0) = penta_sum(v1,z1);
196 p(0,1) = penta_sum(v1,z2);
197 p(0,2) = penta_sum(v1,z3);
198 p(0,3) = penta_sum(v1,z4);
200 p(1,0) = penta_sum(v2,z1);
201 p(1,1) = penta_sum(v2,z2);
202 p(1,2) = penta_sum(v2,z3);
203 p(1,3) = penta_sum(v2,z4);
205 p(2,0) = penta_sum(v3,z1);
206 p(2,1) = penta_sum(v3,z2);
207 p(2,2) = penta_sum(v3,z3);
208 p(2,3) = penta_sum(v3,z4);
210 p(3,0) = penta_sum(v4,z1);
211 p(3,1) = penta_sum(v4,z2);
212 p(3,2) = penta_sum(v4,z3);
213 p(3,3) = penta_sum(v4,z4);
215 for (
int i=0; i < 4; i++) {
219 matrix_inverse_small(p);
225 for (
int k=0; k < n; k++) {
226 r(0) = r(0) + v1(k) * y(k);
227 r(1) = r(1) + v2(k) * y(k);
228 r(2) = r(2) + v3(k) * y(k);
229 r(3) = r(3) + v4(k) * y(k);
232 for (
int j=0; j < 4; j++) {
234 for (
int k=0; k < 4; k++) {
235 s(j) = s(j) + p(j,k) * r(k);
239 for (
int j=0; j < n; j++) {
240 real
sum = z1(j)*s(0) + z2(j)*s(1) + z3(j)*s(2) + z4(j)*s(3);