34 template<
typename real,
typename regType,
35 int m_kernel,
int n_kernel,
int k_kernel,
36 int m_block,
int n_block>
48 size_t const cp_size) {
53 if (m != m_kernel*m_block)
54 throw std::runtime_error(
"Error in gemm_sse(...): m != m_kernel*m_block");
55 if (n != n_kernel*n_block)
56 throw std::runtime_error(
"Error in gemm_sse(...): n != n_kernel*n_block");
58 throw std::runtime_error(
"Error in gemm_sse(...): k != k_kernel");
59 if (ap_size < MM_outer::Pack_type_A::size_packed)
60 throw std::runtime_error(
"Error in gemm_sse(...): " 61 "ap_size < MM_outer::Pack_type_A::size_packed");
62 if (bp_size < MM_outer::Pack_type_B::size_packed)
63 throw std::runtime_error(
"Error in gemm_sse(...): " 64 "bp_size < MM_outer::Pack_type_B::size_packed");
65 if (cp_size < MM_outer::Pack_type_C::size_packed)
66 throw std::runtime_error(
"Error in gemm_sse(...): " 67 "cp_size < MM_outer::Pack_type_C::size_packed");
68 MM_outer::Pack_type_C::template pack<Ordering_col_wise>( C, C_packed, m, n);
69 MM_outer::Pack_type_A::template pack<Ordering_col_wise>(
A, A_packed, m, k);
70 MM_outer::Pack_type_B::template pack<Ordering_col_wise>(
B, B_packed, k, n);
71 MM_outer::exec(&A_packed, &B_packed, C_packed);
72 MM_outer::Pack_type_C::template unpack<Ordering_col_wise>(C, C_packed, m, n);
75 template<
typename real>
87 size_t const cp_size) {
88 throw std::runtime_error(
"gemm_sse not implemented for chosen real type.");
93 double const *
const B,
101 size_t const ap_size,
102 size_t const bp_size,
103 size_t const cp_size) {
104 gemm_sse<double, __m128d, 4, 4, 32, 8, 8>
106 A_packed, B_packed, C_packed, ap_size, bp_size, cp_size);
111 float const *
const B,
119 size_t const ap_size,
120 size_t const bp_size,
121 size_t const cp_size) {
122 gemm_sse<float, __m128, 8, 4, 32, 4, 8>
124 A_packed, B_packed, C_packed, ap_size, bp_size, cp_size);
Matrix multiplication template for architectures with SSE2 or higher and compilers that support C++ i...
Definition: mm_kernel_inner_sse2_A.h:61
ergo_real real
Definition: cubature_rules.h:33
Template for matrix matrix multiplication that wraps around a kernel given as template argument...
Definition: mm_kernel_outer_A.h:45
static void gemm_sse(real const *const A, real const *const B, real *C, size_t const m, size_t const n, size_t const k, real *A_packed, real *B_packed, real *C_packed, size_t const ap_size, size_t const bp_size, size_t const cp_size)
Definition: gemm_sse.h:37