BLAS++ 2024.05.31
BLAS C++ API
Loading...
Searching...
No Matches
ger.hh
1// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
2// SPDX-License-Identifier: BSD-3-Clause
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
5
6#ifndef BLAS_GER_HH
7#define BLAS_GER_HH
8
9#include "blas/util.hh"
10
11#include <limits>
12
13namespace blas {
14
15// =============================================================================
58
59template <typename TA, typename TX, typename TY>
60void ger(
61 blas::Layout layout,
62 int64_t m, int64_t n,
63 blas::scalar_type<TA, TX, TY> alpha,
64 TX const *x, int64_t incx,
65 TY const *y, int64_t incy,
66 TA *A, int64_t lda )
67{
68 typedef blas::scalar_type<TA, TX, TY> scalar_t;
69
70 #define A(i_, j_) A[ (i_) + (j_)*lda ]
71
72 // constants
73 const scalar_t zero = 0;
74
75 // check arguments
76 blas_error_if( layout != Layout::ColMajor &&
77 layout != Layout::RowMajor );
78 blas_error_if( m < 0 );
79 blas_error_if( n < 0 );
80 blas_error_if( incx == 0 );
81 blas_error_if( incy == 0 );
82
83 if (layout == Layout::ColMajor)
84 blas_error_if( lda < m );
85 else
86 blas_error_if( lda < n );
87
88 // quick return
89 if (m == 0 || n == 0 || alpha == zero)
90 return;
91
92 if (layout == Layout::ColMajor) {
93 if (incx == 1 && incy == 1) {
94 // unit stride
95 for (int64_t j = 0; j < n; ++j) {
96 // note: NOT skipping if y[j] is zero, for consistent NAN handling
97 scalar_t tmp = alpha * conj( y[j] );
98 for (int64_t i = 0; i < m; ++i) {
99 A(i, j) += x[i] * tmp;
100 }
101 }
102 }
103 else if (incx == 1) {
104 // x unit stride, y non-unit stride
105 int64_t jy = (incy > 0 ? 0 : (-n + 1)*incy);
106 for (int64_t j = 0; j < n; ++j) {
107 scalar_t tmp = alpha * conj( y[jy] );
108 for (int64_t i = 0; i < m; ++i) {
109 A(i, j) += x[i] * tmp;
110 }
111 jy += incy;
112 }
113 }
114 else {
115 // x and y non-unit stride
116 int64_t kx = (incx > 0 ? 0 : (-m + 1)*incx);
117 int64_t jy = (incy > 0 ? 0 : (-n + 1)*incy);
118 for (int64_t j = 0; j < n; ++j) {
119 scalar_t tmp = alpha * conj( y[jy] );
120 int64_t ix = kx;
121 for (int64_t i = 0; i < m; ++i) {
122 A(i, j) += x[ix] * tmp;
123 ix += incx;
124 }
125 jy += incy;
126 }
127 }
128 }
129 else {
130 // RowMajor
131 if (incx == 1 && incy == 1) {
132 // unit stride
133 for (int64_t i = 0; i < m; ++i) {
134 // note: NOT skipping if x[i] is zero, for consistent NAN handling
135 scalar_t tmp = alpha * x[i];
136 for (int64_t j = 0; j < n; ++j) {
137 A(j, i) += tmp * conj( y[j] );
138 }
139 }
140 }
141 else if (incy == 1) {
142 // x non-unit stride, y unit stride
143 int64_t ix = (incx > 0 ? 0 : (-m + 1)*incx);
144 for (int64_t i = 0; i < m; ++i) {
145 scalar_t tmp = alpha * x[ix];
146 for (int64_t j = 0; j < n; ++j) {
147 A(j, i) += tmp * conj( y[j] );
148 }
149 ix += incx;
150 }
151 }
152 else {
153 // x and y non-unit stride
154 int64_t ky = (incy > 0 ? 0 : (-n + 1)*incy);
155 int64_t ix = (incx > 0 ? 0 : (-m + 1)*incx);
156 for (int64_t i = 0; i < m; ++i) {
157 scalar_t tmp = alpha * x[ix];
158 int64_t jy = ky;
159 for (int64_t j = 0; j < n; ++j) {
160 A(j, i) += tmp * conj( y[jy] );
161 jy += incy;
162 }
163 ix += incx;
164 }
165 }
166 }
167
168 #undef A
169}
170
171} // namespace blas
172
173#endif // #ifndef BLAS_GER_HH
void ger(blas::Layout layout, int64_t m, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TX const *x, int64_t incx, TY const *y, int64_t incy, TA *A, int64_t lda)
General matrix rank-1 update:
Definition ger.hh:60