89template <
typename TA,
typename TB,
typename TC>
95 scalar_type<TA, TB, TC> alpha,
96 TA
const *A, int64_t lda,
97 TB
const *B, int64_t ldb,
98 scalar_type<TA, TB, TC> beta,
101 typedef blas::scalar_type<TA, TB, TC> scalar_t;
103 #define A(i_, j_) A[ (i_) + (j_)*lda ]
104 #define B(i_, j_) B[ (i_) + (j_)*ldb ]
105 #define C(i_, j_) C[ (i_) + (j_)*ldc ]
108 const scalar_t zero = 0;
109 const scalar_t one = 1;
112 blas_error_if( layout != Layout::ColMajor &&
113 layout != Layout::RowMajor );
114 blas_error_if( uplo != Uplo::Lower &&
115 uplo != Uplo::Upper &&
116 uplo != Uplo::General );
117 blas_error_if( n < 0 );
118 blas_error_if( k < 0 );
121 if (trans == Op::ConjTrans) {
125 "trans == Op::ConjTrans && "
126 "( blas::is_complex<TA>::value ||"
127 " blas::is_complex<TB>::value )" );
131 blas_error_if( trans != Op::NoTrans &&
132 trans != Op::Trans );
136 if (layout == Layout::RowMajor) {
137 if (uplo == Uplo::Lower)
139 else if (uplo == Uplo::Upper)
141 trans = (trans == Op::NoTrans)
147 blas_error_if( lda < ((trans == Op::NoTrans) ? n : k) );
148 blas_error_if( ldb < ((trans == Op::NoTrans) ? n : k) );
149 blas_error_if( ldc < n );
152 if (n == 0 || k == 0)
158 if (uplo != Uplo::Upper) {
159 for (int64_t j = 0; j < n; ++j) {
160 for (int64_t i = 0; i <= j; ++i)
164 else if (uplo != Uplo::Lower) {
165 for (int64_t j = 0; j < n; ++j) {
166 for (int64_t i = j; i < n; ++i)
171 for (int64_t j = 0; j < n; ++j) {
172 for (int64_t i = 0; i < n; ++i)
177 else if (beta != one) {
178 if (uplo != Uplo::Upper) {
179 for (int64_t j = 0; j < n; ++j) {
180 for (int64_t i = 0; i <= j; ++i)
184 else if (uplo != Uplo::Lower) {
185 for (int64_t j = 0; j < n; ++j) {
186 for (int64_t i = j; i < n; ++i)
191 for (int64_t j = 0; j < n; ++j) {
192 for (int64_t i = 0; i < n; ++i)
201 if (trans == Op::NoTrans) {
202 if (uplo != Uplo::Lower) {
204 for (int64_t j = 0; j < n; ++j) {
206 for (int64_t i = 0; i <= j; ++i)
209 for (int64_t l = 0; l < k; ++l) {
210 scalar_t alpha_Bjl = alpha*B(j, l);
211 scalar_t alpha_Ajl = alpha*A(j, l);
212 for (int64_t i = 0; i <= j; ++i)
213 C(i, j) += A(i, l)*alpha_Bjl + B(i, l)*alpha_Ajl;
218 for (int64_t j = 0; j < n; ++j) {
220 for (int64_t i = j; i < n; ++i)
223 for (int64_t l = 0; l < k; ++l) {
224 scalar_t alpha_Bjl = alpha*B(j, l);
225 scalar_t alpha_Ajl = alpha*A(j, l);
226 for (int64_t i = j; i < n; ++i)
227 C(i, j) += A(i, l)*alpha_Bjl + B(i, l)*alpha_Ajl;
233 if (uplo != Uplo::Lower) {
235 for (int64_t j = 0; j < n; ++j) {
236 for (int64_t i = 0; i <= j; ++i) {
237 scalar_t sum1 = zero;
238 scalar_t sum2 = zero;
239 for (int64_t l = 0; l < k; ++l) {
240 sum1 += A(l, i) * B(l, j);
241 sum2 += B(l, i) * A(l, j);
243 C(i, j) = alpha*sum1 + alpha*sum2 + beta*C(i, j);
248 for (int64_t j = 0; j < n; ++j) {
249 for (int64_t i = j; i < n; ++i) {
250 scalar_t sum1 = zero;
251 scalar_t sum2 = zero;
252 for (int64_t l = 0; l < k; ++l) {
253 sum1 += A(l, i) * B(l, j);
254 sum2 += B(l, i) * A(l, j);
256 C(i, j) = alpha*sum1 + alpha*sum2 + beta*C(i, j);
262 if (uplo == Uplo::General) {
263 for (int64_t j = 0; j < n; ++j) {
264 for (int64_t i = j+1; i < n; ++i)
void syr2k(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float const *B, int64_t ldb, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syr2k.cc:107
True if T is std::complex<T2> for some type T2.
Definition util.hh:349