78template <
typename TA,
typename TC>
84 scalar_type<TA, TC> alpha,
85 TA
const *A, int64_t lda,
86 scalar_type<TA, TC> beta,
89 typedef blas::scalar_type<TA, TC> scalar_t;
91 #define A(i_, j_) A[ (i_) + (j_)*lda ]
92 #define C(i_, j_) C[ (i_) + (j_)*ldc ]
95 const scalar_t zero = 0;
96 const scalar_t one = 1;
99 blas_error_if( layout != Layout::ColMajor &&
100 layout != Layout::RowMajor );
101 blas_error_if( uplo != Uplo::Lower &&
102 uplo != Uplo::Upper &&
103 uplo != Uplo::General );
104 blas_error_if( n < 0 );
105 blas_error_if( k < 0 );
108 if (trans == Op::ConjTrans) {
111 "trans == Op::ConjTrans && "
112 "blas::is_complex<TA>::value" );
116 blas_error_if( trans != Op::NoTrans &&
117 trans != Op::Trans );
121 if (layout == Layout::RowMajor) {
122 if (uplo == Uplo::Lower)
124 else if (uplo == Uplo::Upper)
126 trans = (trans == Op::NoTrans)
132 blas_error_if( lda < ((trans == Op::NoTrans) ? n : k) );
133 blas_error_if( ldc < n );
136 if (n == 0 || k == 0)
142 if (uplo != Uplo::Upper) {
143 for (int64_t j = 0; j < n; ++j) {
144 for (int64_t i = 0; i <= j; ++i)
148 else if (uplo != Uplo::Lower) {
149 for (int64_t j = 0; j < n; ++j) {
150 for (int64_t i = j; i < n; ++i)
155 for (int64_t j = 0; j < n; ++j) {
156 for (int64_t i = 0; i < n; ++i)
161 else if (beta != one) {
162 if (uplo != Uplo::Upper) {
163 for (int64_t j = 0; j < n; ++j) {
164 for (int64_t i = 0; i <= j; ++i)
168 else if (uplo != Uplo::Lower) {
169 for (int64_t j = 0; j < n; ++j) {
170 for (int64_t i = j; i < n; ++i)
175 for (int64_t j = 0; j < n; ++j) {
176 for (int64_t i = 0; i < n; ++i)
185 if (trans == Op::NoTrans) {
186 if (uplo != Uplo::Lower) {
188 for (int64_t j = 0; j < n; ++j) {
190 for (int64_t i = 0; i <= j; ++i)
193 for (int64_t l = 0; l < k; ++l) {
194 scalar_t alpha_Ajl = alpha*A(j, l);
195 for (int64_t i = 0; i <= j; ++i)
196 C(i, j) += A(i, l)*alpha_Ajl;
201 for (int64_t j = 0; j < n; ++j) {
203 for (int64_t i = j; i < n; ++i)
206 for (int64_t l = 0; l < k; ++l) {
207 scalar_t alpha_Ajl = alpha*A(j, l);
208 for (int64_t i = j; i < n; ++i)
209 C(i, j) += A(i, l)*alpha_Ajl;
215 if (uplo != Uplo::Lower) {
217 for (int64_t j = 0; j < n; ++j) {
218 for (int64_t i = 0; i <= j; ++i) {
220 for (int64_t l = 0; l < k; ++l)
221 sum += A(l, i) * A(l, j);
222 C(i, j) = alpha*sum + beta*C(i, j);
227 for (int64_t j = 0; j < n; ++j) {
228 for (int64_t i = j; i < n; ++i) {
230 for (int64_t l = 0; l < k; ++l) {
231 sum += A(l, i) * A(l, j);
233 C(i, j) = alpha*sum + beta*C(i, j);
239 if (uplo == Uplo::General) {
240 for (int64_t j = 0; j < n; ++j) {
241 for (int64_t i = j+1; i < n; ++i)
void syrk(blas::Layout layout, blas::Uplo uplo, blas::Op trans, int64_t n, int64_t k, float alpha, float const *A, int64_t lda, float beta, float *C, int64_t ldc, blas::Queue &queue)
GPU device, float version.
Definition device_syrk.cc:101
True if T is std::complex<T2> for some type T2.
Definition util.hh:349