YAKL
YAKL_intrinsics_matmul.h
Go to the documentation of this file.
1 
2 #pragma once
3 // Included by YAKL_intrinsics.h
4 
6 namespace yakl {
7  namespace intrinsics {
8 
10  // Matrix multiplication routines for column-row format
12  template <class T, index_t COL_L, index_t ROW_L, index_t COL_R>
13  YAKL_INLINE SArray<T,2,COL_R,ROW_L>
15  SArray<T,2,COL_R,COL_L> const &right ) {
17  for (index_t i=0; i < COL_R; i++) {
18  for (index_t j=0; j < ROW_L; j++) {
19  T tmp = 0;
20  for (index_t k=0; k < COL_L; k++) {
21  tmp += left(k,j) * right(i,k);
22  }
23  ret(i,j) = tmp;
24  }
25  }
26  return ret;
27  }
28 
29  template<class T, index_t COL_L, index_t ROW_L>
32  SArray<T,1,COL_L> const &right ) {
34  for (index_t j=0; j < ROW_L; j++) {
35  T tmp = 0;
36  for (index_t k=0; k < COL_L; k++) {
37  tmp += left(k,j) * right(k);
38  }
39  ret(j) = tmp;
40  }
41  return ret;
42  }
43 
44  template <class T, int COL_L, int ROW_L, int COL_R>
46  matmul_cr ( FSArray<T,2,SB<COL_L>,SB<ROW_L>> const &left ,
47  FSArray<T,2,SB<COL_R>,SB<COL_L>> const &right ) {
49  for (index_t i=1; i <= COL_R; i++) {
50  for (index_t j=1; j <= ROW_L; j++) {
51  T tmp = 0;
52  for (index_t k=1; k <= COL_L; k++) {
53  tmp += left(k,j) * right(i,k);
54  }
55  ret(i,j) = tmp;
56  }
57  }
58  return ret;
59  }
60 
61  template<class T, int COL_L, int ROW_L>
63  matmul_cr ( FSArray<T,2,SB<COL_L>,SB<ROW_L>> const &left ,
64  FSArray<T,1,SB<COL_L>> const &right ) {
66  for (index_t j=1; j <= ROW_L; j++) {
67  T tmp = 0;
68  for (index_t k=1; k <= COL_L; k++) {
69  tmp += left(k,j) * right(k);
70  }
71  ret(j) = tmp;
72  }
73  return ret;
74  }
75 
76 
78  // Matrix multiplication routines for row-column format
80  template <class T, index_t COL_L, index_t ROW_L, index_t COL_R>
83  SArray<T,2,COL_L,COL_R> const &right ) {
85  for (index_t i=0; i < COL_R; i++) {
86  for (index_t j=0; j < ROW_L; j++) {
87  T tmp = 0;
88  for (index_t k=0; k < COL_L; k++) {
89  tmp += left(j,k) * right(k,i);
90  }
91  ret(j,i) = tmp;
92  }
93  }
94  return ret;
95  }
96 
97  template<class T, index_t COL_L, index_t ROW_L>
100  SArray<T,1,COL_L> const &right ) {
101  SArray<T,1,ROW_L> ret;
102  for (index_t j=0; j < ROW_L; j++) {
103  T tmp = 0;
104  for (index_t k=0; k < COL_L; k++) {
105  tmp += left(j,k) * right(k);
106  }
107  ret(j) = tmp;
108  }
109  return ret;
110  }
111 
112  template <class T, int COL_L, int ROW_L, int COL_R>
114  matmul_rc ( FSArray<T,2,SB<ROW_L>,SB<COL_L>> const &left ,
115  FSArray<T,2,SB<COL_L>,SB<COL_R>> const &right ) {
117  for (index_t i=1; i <= COL_R; i++) {
118  for (index_t j=1; j <= ROW_L; j++) {
119  T tmp = 0;
120  for (index_t k=1; k <= COL_L; k++) {
121  tmp += left(j,k) * right(k,i);
122  }
123  ret(j,i) = tmp;
124  }
125  }
126  return ret;
127  }
128 
129  template<class T, int COL_L, int ROW_L>
131  matmul_rc ( FSArray<T,2,SB<ROW_L>,SB<COL_L>> const &left ,
132  FSArray<T,1,SB<COL_L>> const &right ) {
134  for (index_t j=1; j <= ROW_L; j++) {
135  T tmp = 0;
136  for (index_t k=1; k <= COL_L; k++) {
137  tmp += left(j,k) * right(k);
138  }
139  ret(j) = tmp;
140  }
141  return ret;
142  }
143 
144  }
145 }
__YAKL_NAMESPACE_WRAPPER_END__
#define __YAKL_NAMESPACE_WRAPPER_END__
Definition: YAKL.h:20
yakl::SB
This specifies a set of bounds for a dimension when declaring a yakl::FSArray.
Definition: YAKL_FSArray.h:18
__YAKL_NAMESPACE_WRAPPER_BEGIN__
#define __YAKL_NAMESPACE_WRAPPER_BEGIN__
Definition: YAKL.h:19
YAKL_INLINE
#define YAKL_INLINE
Used to decorate functions called from kernels (parallel_for and parallel_outer) or from CPU function...
Definition: YAKL_defines.h:140
yakl::intrinsics::matmul_cr
YAKL_INLINE SArray< T, 2, COL_R, ROW_L > matmul_cr(SArray< T, 2, COL_L, ROW_L > const &left, SArray< T, 2, COL_R, COL_L > const &right)
Definition: YAKL_intrinsics_matmul.h:14
yakl::index_t
unsigned int index_t
Definition: YAKL.h:41
yakl::CSArray
C-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_CSArray.h:30
yakl
yakl::intrinsics::matmul_rc
YAKL_INLINE SArray< T, 2, ROW_L, COL_R > matmul_rc(SArray< T, 2, ROW_L, COL_L > const &left, SArray< T, 2, COL_L, COL_R > const &right)
Definition: YAKL_intrinsics_matmul.h:82
yakl::FSArray
Fortran-style array on the stack similar in nature to, e.g., float arr[ny][nx];
Definition: YAKL_FSArray.h:53