13   template <
unsigned int n, 
class real>
 
   14   YAKL_INLINE real penta_sum(SArray<real,1,n> 
const &v, SArray<real,1,n> 
const &z) {
 
   16     for (
int k=0; k < n; k++) {
 
   25   template <
unsigned int n, 
class real>
 
   26   YAKL_INLINE void matrix_inverse_small(SArray<real,2,n,n> &a) {
 
   27     SArray<real,2,n,n> inv;
 
   30     for (
int icol = 0; icol < n; icol++) {
 
   31       for (
int irow = 0; irow < n; irow++) {
 
   41     for (
int idiag = 0; idiag < n; idiag++) {
 
   43       real factor = 
static_cast<real
>(1)/a(idiag,idiag);
 
   44       for (
int icol = idiag; icol < n; icol++) {
 
   45         a(icol,idiag) *= factor;
 
   47       for (
int icol = 0; icol < n; icol++) {
 
   48         inv(icol,idiag) *= factor;
 
   50       for (
int irow = idiag+1; irow < n; irow++) {
 
   51         real factor = a(idiag,irow);
 
   52         for (
int icol = idiag; icol < n; icol++) {
 
   53           a  (icol,irow) -= factor * a  (icol,idiag);
 
   55         for (
int icol = 0; icol < n; icol++) {
 
   56           inv(icol,irow) -= factor * inv(icol,idiag);
 
   62     for (
int idiag = n-1; idiag >= 1; idiag--) {
 
   63       for (
int irow = 0; irow < idiag; irow++) {
 
   64         real factor = a(idiag,irow);
 
   65         for (
int icol = irow+1; icol < n; icol++) {
 
   66           a  (icol,irow) -= factor * a  (icol,idiag);
 
   68         for (
int icol = 0; icol < n; icol++) {
 
   69           inv(icol,irow) -= factor * inv(icol,idiag);
 
   75     for (
int icol = 0; icol < n; icol++) {
 
   76       for (
int irow = 0; irow < n; irow++) {
 
   77         a(icol,irow) = inv(icol,irow);
 
   95   template <
unsigned int n, 
class real>
 
  111     bet  = 
static_cast<real
>(1)/c(0);
 
  116     bet = c(1) + b(1)*p(0);
 
  117     bet  = -
static_cast<real
>(1)/bet;
 
  118     p(1) = (d(1) + b(1)*q(0)) * bet;
 
  120     u(1) = (b(1)*u(0) - f(1)) * bet;
 
  123     for (
int i=2; i < n; i++) {
 
  124       bet = b(i) + a(i) * p(i-2);
 
  125       den = c(i) + a(i)*q(i-2) + bet*p(i-1);
 
  126       den = -
static_cast<real
>(1)/den;
 
  127       p(i) = (d(i) + bet*q(i-1)) * den;
 
  129       u(i) = (a(i)*u(i-2) + bet*u(i-1) - f(i)) * den;
 
  133     u(n-2) = u(n-2) + p(n-2) * u(n-1);
 
  134     for (
int i=n-3; i >= 0; i--) {
 
  135       u(i) = u(i) + p(i) * u(i+1) + q(i) * u(i+2);
 
  152   template <
unsigned int n, 
class real>
 
  161     SArray<real,1,n>   u1, u2, u3, u4, v1, v2, v3, v4, z1, z2, z3, z4, r, s, y;
 
  171     for (
int i=0; i < n; i++) {
 
  172       u1(i) = 0;   u2(i) = 0;   u3(i) = 0;   u4(i) = 0;
 
  173       v1(i) = 0;   v2(i) = 0;   v3(i) = 0;   v4(i) = 0;
 
  174       z1(i) = 0;   z2(i) = 0;   z3(i) = 0;   z4(i) = 0;
 
  195     p(0,0) = penta_sum(v1,z1);
 
  196     p(0,1) = penta_sum(v1,z2);
 
  197     p(0,2) = penta_sum(v1,z3);
 
  198     p(0,3) = penta_sum(v1,z4);
 
  200     p(1,0) = penta_sum(v2,z1);
 
  201     p(1,1) = penta_sum(v2,z2);
 
  202     p(1,2) = penta_sum(v2,z3);
 
  203     p(1,3) = penta_sum(v2,z4);
 
  205     p(2,0) = penta_sum(v3,z1);
 
  206     p(2,1) = penta_sum(v3,z2);
 
  207     p(2,2) = penta_sum(v3,z3);
 
  208     p(2,3) = penta_sum(v3,z4);
 
  210     p(3,0) = penta_sum(v4,z1);
 
  211     p(3,1) = penta_sum(v4,z2);
 
  212     p(3,2) = penta_sum(v4,z3);
 
  213     p(3,3) = penta_sum(v4,z4);
 
  215     for (
int i=0; i < 4; i++) {
 
  219     matrix_inverse_small(p);
 
  225     for (
int k=0; k < n; k++) {
 
  226       r(0) = r(0) + v1(k) * y(k);
 
  227       r(1) = r(1) + v2(k) * y(k);
 
  228       r(2) = r(2) + v3(k) * y(k);
 
  229       r(3) = r(3) + v4(k) * y(k);
 
  232     for (
int j=0; j < 4; j++) {
 
  234       for (
int k=0; k < 4; k++) {
 
  235         s(j) = s(j) + p(j,k) * r(k);
 
  239     for (
int j=0; j < n; j++) {
 
  240       real 
sum = z1(j)*s(0) + z2(j)*s(1) + z3(j)*s(2) + z4(j)*s(3);