68template <
typename TA,
typename TX>
75 TA
const *A, int64_t lda,
78 #define A(i_, j_) A[ (i_) + (j_)*lda ]
81 blas_error_if( layout != Layout::ColMajor &&
82 layout != Layout::RowMajor );
83 blas_error_if( uplo != Uplo::Lower &&
84 uplo != Uplo::Upper );
85 blas_error_if( trans != Op::NoTrans &&
87 trans != Op::ConjTrans );
88 blas_error_if( diag != Diag::NonUnit &&
90 blas_error_if( n < 0 );
91 blas_error_if( lda < n );
92 blas_error_if( incx == 0 );
101 if (layout == Layout::RowMajor) {
102 uplo = (uplo == Uplo::Lower ? Uplo::Upper : Uplo::Lower);
103 if (trans == Op::NoTrans) {
107 if (trans == Op::ConjTrans) {
114 bool nonunit = (diag == Diag::NonUnit);
115 int64_t kx = (incx > 0 ? 0 : (-n + 1)*incx);
117 if (trans == Op::NoTrans && ! doconj) {
119 if (uplo == Uplo::Upper) {
123 for (int64_t j = 0; j < n; ++j) {
126 for (int64_t i = 0; i <= j-1; ++i) {
127 x[i] += tmp * A(i, j);
137 for (int64_t j = 0; j < n; ++j) {
141 for (int64_t i = 0; i <= j-1; ++i) {
142 x[ix] += tmp * A(i, j);
156 for (int64_t j = n-1; j >= 0; --j) {
159 for (int64_t i = n-1; i >= j+1; --i) {
160 x[i] += tmp * A(i, j);
171 for (int64_t j = n-1; j >= 0; --j) {
175 for (int64_t i = n-1; i >= j+1; --i) {
176 x[ix] += tmp * A(i, j);
187 else if (trans == Op::NoTrans && doconj) {
189 if (uplo == Uplo::Upper) {
193 for (int64_t j = 0; j < n; ++j) {
196 for (int64_t i = 0; i <= j-1; ++i) {
197 x[i] += tmp * conj( A(i, j) );
200 x[j] *= conj( A(j, j) );
207 for (int64_t j = 0; j < n; ++j) {
211 for (int64_t i = 0; i <= j-1; ++i) {
212 x[ix] += tmp * conj( A(i, j) );
216 x[jx] *= conj( A(j, j) );
226 for (int64_t j = n-1; j >= 0; --j) {
229 for (int64_t i = n-1; i >= j+1; --i) {
230 x[i] += tmp * conj( A(i, j) );
233 x[j] *= conj( A(j, j) );
241 for (int64_t j = n-1; j >= 0; --j) {
245 for (int64_t i = n-1; i >= j+1; --i) {
246 x[ix] += tmp * conj( A(i, j) );
250 x[jx] *= conj( A(j, j) );
257 else if (trans == Op::Trans) {
259 if (uplo == Uplo::Upper) {
263 for (int64_t j = n-1; j >= 0; --j) {
268 for (int64_t i = j - 1; i >= 0; --i) {
269 tmp += A(i, j) * x[i];
276 int64_t jx = kx + (n - 1)*incx;
277 for (int64_t j = n-1; j >= 0; --j) {
283 for (int64_t i = j - 1; i >= 0; --i) {
285 tmp += A(i, j) * x[ix];
296 for (int64_t j = 0; j < n; ++j) {
301 for (int64_t i = j + 1; i < n; ++i) {
302 tmp += A(i, j) * x[i];
310 for (int64_t j = 0; j < n; ++j) {
316 for (int64_t i = j + 1; i < n; ++i) {
318 tmp += A(i, j) * x[ix];
329 if (uplo == Uplo::Upper) {
333 for (int64_t j = n-1; j >= 0; --j) {
336 tmp *= conj( A(j, j) );
338 for (int64_t i = j - 1; i >= 0; --i) {
339 tmp += conj( A(i, j) ) * x[i];
346 int64_t jx = kx + (n - 1)*incx;
347 for (int64_t j = n-1; j >= 0; --j) {
351 tmp *= conj( A(j, j) );
353 for (int64_t i = j - 1; i >= 0; --i) {
355 tmp += conj( A(i, j) ) * x[ix];
366 for (int64_t j = 0; j < n; ++j) {
369 tmp *= conj( A(j, j) );
371 for (int64_t i = j + 1; i < n; ++i) {
372 tmp += conj( A(i, j) ) * x[i];
380 for (int64_t j = 0; j < n; ++j) {
384 tmp *= conj( A(j, j) );
386 for (int64_t i = j + 1; i < n; ++i) {
388 tmp += conj( A(i, j) ) * x[ix];
void trmv(blas::Layout layout, blas::Uplo uplo, blas::Op trans, blas::Diag diag, int64_t n, TA const *A, int64_t lda, TX *x, int64_t incx)
Triangular matrix-vector multiply:
Definition trmv.hh:69