BLAS++ 2024.05.31
BLAS C++ API
Loading...
Searching...
No Matches
geru.hh
1// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
2// SPDX-License-Identifier: BSD-3-Clause
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
5
6#ifndef BLAS_GERU_HH
7#define BLAS_GERU_HH
8
9#include "blas/util.hh"
10#include "blas/ger.hh"
11
12#include <limits>
13
14namespace blas {
15
16// =============================================================================
59
60template <typename TA, typename TX, typename TY>
61void geru(
62 blas::Layout layout,
63 int64_t m, int64_t n,
64 blas::scalar_type<TA, TX, TY> alpha,
65 TX const *x, int64_t incx,
66 TY const *y, int64_t incy,
67 TA *A, int64_t lda )
68{
69 typedef blas::scalar_type<TA, TX, TY> scalar_t;
70
71 #define A(i_, j_) A[ (i_) + (j_)*lda ]
72
73 // constants
74 const scalar_t zero = 0;
75
76 // check arguments
77 blas_error_if( layout != Layout::ColMajor &&
78 layout != Layout::RowMajor );
79 blas_error_if( m < 0 );
80 blas_error_if( n < 0 );
81 blas_error_if( incx == 0 );
82 blas_error_if( incy == 0 );
83
84 if (layout == Layout::ColMajor)
85 blas_error_if( lda < m );
86 else
87 blas_error_if( lda < n );
88
89 // quick return
90 if (m == 0 || n == 0 || alpha == zero)
91 return;
92
93 // for row-major, simply swap dimensions and x <=> y
94 // this doesn't work in the complex gerc case because y gets conj
95 if (layout == Layout::RowMajor) {
96 geru( Layout::ColMajor, n, m, alpha, y, incy, x, incx, A, lda );
97 return;
98 }
99
100 if (incx == 1 && incy == 1) {
101 // unit stride
102 for (int64_t j = 0; j < n; ++j) {
103 // note: NOT skipping if y[j] is zero, for consistent NAN handling
104 scalar_t tmp = alpha * y[j];
105 for (int64_t i = 0; i < m; ++i) {
106 A(i, j) += x[i] * tmp;
107 }
108 }
109 }
110 else if (incx == 1) {
111 // x unit stride, y non-unit stride
112 int64_t jy = (incy > 0 ? 0 : (-n + 1)*incy);
113 for (int64_t j = 0; j < n; ++j) {
114 scalar_t tmp = alpha * y[jy];
115 for (int64_t i = 0; i < m; ++i) {
116 A(i, j) += x[i] * tmp;
117 }
118 jy += incy;
119 }
120 }
121 else {
122 // x and y non-unit stride
123 int64_t kx = (incx > 0 ? 0 : (-m + 1)*incx);
124 int64_t jy = (incy > 0 ? 0 : (-n + 1)*incy);
125 for (int64_t j = 0; j < n; ++j) {
126 scalar_t tmp = alpha * y[jy];
127 int64_t ix = kx;
128 for (int64_t i = 0; i < m; ++i) {
129 A(i, j) += x[ix] * tmp;
130 ix += incx;
131 }
132 jy += incy;
133 }
134 }
135
136 #undef A
137}
138
139} // namespace blas
140
141#endif // #ifndef BLAS_GER_HH
void geru(blas::Layout layout, int64_t m, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TX const *x, int64_t incx, TY const *y, int64_t incy, TA *A, int64_t lda)
General matrix rank-1 update:
Definition geru.hh:61