BLAS++ 2024.05.31
BLAS C++ API
Loading...
Searching...
No Matches
her2.hh
1// Copyright (c) 2017-2023, University of Tennessee. All rights reserved.
2// SPDX-License-Identifier: BSD-3-Clause
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the BSD 3-Clause license. See the accompanying LICENSE file.
5
6#ifndef BLAS_HER2_HH
7#define BLAS_HER2_HH
8
9#include "blas/util.hh"
10#include "blas/syr2.hh"
11
12#include <limits>
13
14namespace blas {
15
16// =============================================================================
64
65template <typename TA, typename TX, typename TY>
66void her2(
67 blas::Layout layout,
68 blas::Uplo uplo,
69 int64_t n,
70 blas::scalar_type<TA, TX, TY> alpha,
71 TX const *x, int64_t incx,
72 TY const *y, int64_t incy,
73 TA *A, int64_t lda )
74{
75 typedef blas::scalar_type<TA, TX, TY> scalar_t;
76
77 #define A(i_, j_) A[ (i_) + (j_)*lda ]
78
79 // constants
80 const scalar_t zero = 0;
81
82 // check arguments
83 blas_error_if( layout != Layout::ColMajor &&
84 layout != Layout::RowMajor );
85 blas_error_if( uplo != Uplo::Lower &&
86 uplo != Uplo::Upper );
87 blas_error_if( n < 0 );
88 blas_error_if( incx == 0 );
89 blas_error_if( incy == 0 );
90 blas_error_if( lda < n );
91
92 // quick return
93 if (n == 0 || alpha == zero)
94 return;
95
96 // for row major, swap lower <=> upper
97 if (layout == Layout::RowMajor) {
98 uplo = (uplo == Uplo::Lower ? Uplo::Upper : Uplo::Lower);
99 }
100
101 int64_t kx = (incx > 0 ? 0 : (-n + 1)*incx);
102 int64_t ky = (incy > 0 ? 0 : (-n + 1)*incy);
103 if (uplo == Uplo::Upper) {
104 if (incx == 1 && incy == 1) {
105 // unit stride
106 for (int64_t j = 0; j < n; ++j) {
107 // note: NOT skipping if x[j] or y[j] is zero, for consistent NAN handling
108 scalar_t tmp1 = alpha * conj( y[j] );
109 scalar_t tmp2 = conj( alpha * x[j] );
110 for (int64_t i = 0; i <= j-1; ++i) {
111 A(i, j) += x[i]*tmp1 + y[i]*tmp2;
112 }
113 A(j, j) = real( A(j, j) ) + real( x[j]*tmp1 + y[j]*tmp2 );
114 }
115 }
116 else {
117 // non-unit stride
118 int64_t jx = kx;
119 int64_t jy = ky;
120 for (int64_t j = 0; j < n; ++j) {
121 scalar_t tmp1 = alpha * conj( y[jy] );
122 scalar_t tmp2 = conj( alpha * x[jx] );
123 int64_t ix = kx;
124 int64_t iy = ky;
125 for (int64_t i = 0; i <= j-1; ++i) {
126 A(i, j) += x[ix]*tmp1 + y[iy]*tmp2;
127 ix += incx;
128 iy += incy;
129 }
130 A(j, j) = real( A(j, j) ) + real( x[jx]*tmp1 + y[jy]*tmp2 );
131 jx += incx;
132 jy += incy;
133 }
134 }
135 }
136 else {
137 // lower triangle
138 if (incx == 1 && incy == 1) {
139 // unit stride
140 for (int64_t j = 0; j < n; ++j) {
141 scalar_t tmp1 = alpha * conj( y[j] );
142 scalar_t tmp2 = conj( alpha * x[j] );
143 A(j, j) = real( A(j, j) ) + real( x[j]*tmp1 + y[j]*tmp2 );
144 for (int64_t i = j+1; i < n; ++i) {
145 A(i, j) += x[i]*tmp1 + y[i]*tmp2;
146 }
147 }
148 }
149 else {
150 // non-unit stride
151 int64_t jx = kx;
152 int64_t jy = ky;
153 for (int64_t j = 0; j < n; ++j) {
154 scalar_t tmp1 = alpha * conj( y[jy] );
155 scalar_t tmp2 = conj( alpha * x[jx] );
156 A(j, j) = real( A(j, j) ) + real( x[jx]*tmp1 + y[jy]*tmp2 );
157 int64_t ix = jx;
158 int64_t iy = jy;
159 for (int64_t i = j+1; i < n; ++i) {
160 ix += incx;
161 iy += incy;
162 A(i, j) += x[ix]*tmp1 + y[iy]*tmp2;
163 }
164 jx += incx;
165 jy += incy;
166 }
167 }
168 }
169
170 #undef A
171}
172
173} // namespace blas
174
175#endif // #ifndef BLAS_HER2_HH
void her2(blas::Layout layout, blas::Uplo uplo, int64_t n, blas::scalar_type< TA, TX, TY > alpha, TX const *x, int64_t incx, TY const *y, int64_t incy, TA *A, int64_t lda)
Hermitian matrix rank-2 update:
Definition her2.hh:66