Functions
template<typename scalar_t >
void	slate::hb2st (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, Options const &opts)
	Reduces a band Hermitian matrix to a bidiagonal matrix using bulge chasing.

template<typename scalar_t >
void	slate::he2hb (HermitianMatrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts)
	Distributed parallel reduction to band for 3-stage SVD.

template<typename scalar_t >
void	slate::internal::herf (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &A)
	Applies a Householder reflector \(H = I - \tau v v^H\) to the Hermitian matrix \(A\) on the left and right.

template<Target target, typename scalar_t >
void	slate::internal::hebr1 (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &&A, int priority)
	Implements task type 1 in the tridiagonal bulge chasing algorithm.

template<Target target, typename scalar_t >
void	slate::internal::hebr2 (int64_t n1, scalar_t v1, int64_t n2, scalar_t v2, Matrix< scalar_t > &&A, int priority)
	Implements task type 2 in the tridiagonal bulge chasing algorithm.

template<Target target, typename scalar_t >
void	slate::internal::hebr3 (int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &&A, int priority)
	Implements task type 3 in the tridiagonal bulge chasing algorithm.

template<typename scalar_t >
void	slate::internal::hebr3 (internal::TargetType< Target::HostTask >, int64_t n, scalar_t *v, HermitianMatrix< scalar_t > &A, int priority)
	Implements task type 3 in the tridiagonal bulge chasing algorithm, updating a diagonal block with a 2-sided Householder transformation.

template<typename real_t >
void	slate::stedc (std::vector< real_t > &D, std::vector< real_t > &E, Matrix< real_t > &Q, Options const &opts)
	Computes all eigenvalues and eigenvectors of a real symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm.

template<typename real_t >
void	slate::stedc_deflate (int64_t n, int64_t n1, real_t &rho, real_t D, real_t Dsecular, real_t z, real_t zsecular, Matrix< real_t > &Q, Matrix< real_t > &Qtype, int64_t *itype, int64_t &nsecular, int64_t &Qtype12_begin, int64_t &Qtype12_end, int64_t &Qtype23_begin, int64_t &Qtype23_end, Options const &opts)
	Sorts the two sets of eigenvalues together into a single sorted set, then deflates eigenvalues, which both resolves stability issues (division by zero) and reduces the size of the problem.

template<typename real_t >
void	slate::stedc_merge (int64_t n, int64_t n1, real_t rho, real_t *D, Matrix< real_t > &Q, Matrix< real_t > &Qtype, Matrix< real_t > &U, Options const &opts)
	Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix, in parallel.

template<typename real_t >
void	slate::stedc_secular (int64_t nsecular, int64_t n, real_t rho, real_t D, real_t z, real_t Lambda, Matrix< real_t > &U, int64_t itype, Options const &opts)
	Finds the nsecular roots of the secular equation, as defined by the values in rho, D, z, and computes the corresponding eigenvectors in U.

template<typename real_t >
void	slate::stedc_solve (std::vector< real_t > &D, std::vector< real_t > &E, Matrix< real_t > &Q, Matrix< real_t > &W, Matrix< real_t > &U, Options const &opts)
	Computes all eigenvalues and eigenvectors of a symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm.

template<typename real_t >
void	slate::stedc_z_vector (Matrix< real_t > &Q, std::vector< real_t > &z, Options const &opts)
	Communicates the z vector to all ranks.

template<typename scalar_t >
void	slate::steqr2 (Job jobz, std::vector< blas::real_type< scalar_t > > &D, std::vector< blas::real_type< scalar_t > > &E, Matrix< scalar_t > &Z, Options const &opts)
	Computes all eigenvalues/eigenvectors of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm.

template<typename scalar_t >
void	slate::sterf (std::vector< scalar_t > &D, std::vector< scalar_t > &E, Options const &opts)
	Computes all eigenvalues of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm.

template<typename scalar_t >
void	slate::unmtr_hb2st (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C, const std::map< Option, Value > &opts)
	Multiplies the general m-by-n matrix C by Q from `slate::hb2st` as follows:

template<typename scalar_t >
void	slate::unmtr_he2hb (Side side, Op op, HermitianMatrix< scalar_t > &A, TriangularFactors< scalar_t > T, Matrix< scalar_t > &C, Options const &opts)
	Multiplies the general m-by-n matrix C by Q from `slate::he2hb` as follows:

Detailed Description

Function Documentation

◆ hb2st()

template<typename scalar_t >

void slate::hb2st	(	HermitianBandMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	V,
		Options const &	opts
	)

Reduces a band Hermitian matrix to a bidiagonal matrix using bulge chasing.

Template Parameters

scalar_t One of float, double, std::complex<float>, std::complex<double>.

Parameters

[in,out]	A	The band Hermitian matrix A.
[out]	V	Matrix of Householder reflectors produced in the process. Dimension 2*band-by-XYZ todo
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

◆ he2hb()

template<typename scalar_t >

void slate::he2hb	(	HermitianMatrix< scalar_t > &	A,
		TriangularFactors< scalar_t > &	T,
		Options const &	opts
	)

Distributed parallel reduction to band for 3-stage SVD.

Reduces an n-by-n Hermitian matrix \(A\) to band form using unitary transformations. The factorization has the form

\[ A = Q B Q^H \]

where \(Q\) is unitary and \(B\) is Hermitian band with nb sub and superdiagonals.

Template Parameters

scalar_t One of float, double, std::complex<float>, std::complex<double>.

Parameters

[in,out]	A	On entry, the n-by-n Hermitian matrix \(A\). On exit: [upper is not yet implemented] If A is upper, the elements Aij for j = i, ..., i+nb, represent the Hermitian band matrix B. The elements above the nb-th superdiagonal, along with T, represent the unitary matrix \(Q\) as a product of elementary reflectors. If A is lower, the elements Aij for j = i-nb, ..., i, represent the Hermitian band matrix B. The elements below the nb-th subdiagonal, along with T, represent the unitary matrix \(Q\) as a product of elementary reflectors.
[out]	T	On exit, triangular matrices of the block reflectors for Q.
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::InnerBlocking: Inner blocking to use for panel. Default 16. Option::MaxPanelThreads: Number of threads to use for panel. Default omp_get_max_threads()/2. Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: not implemented. HostBatch: not implemented. Devices: batched BLAS on GPU device. Note a lookahead is not possible with he2hb due to dependencies from updating on both left and right sides.

◆ hebr1()

template<Target target, typename scalar_t >

void slate::internal::hebr1	(	int64_t	n,
		scalar_t *	v,
		HermitianMatrix< scalar_t > &&	A,
		int	priority
	)

Implements task type 1 in the tridiagonal bulge chasing algorithm.

Dispatches to target implementations.

◆ hebr2()

template<Target target, typename scalar_t >

void slate::internal::hebr2	(	int64_t	n1,
		scalar_t *	v1,
		int64_t	n2,
		scalar_t *	v2,
		Matrix< scalar_t > &&	A,
		int	priority
	)

Implements task type 2 in the tridiagonal bulge chasing algorithm.

Dispatches to target implementations.

◆ hebr3() [1/2]

template<Target target, typename scalar_t >

void slate::internal::hebr3	(	int64_t	n,
		scalar_t *	v,
		HermitianMatrix< scalar_t > &&	A,
		int	priority
	)

Implements task type 3 in the tridiagonal bulge chasing algorithm.

Dispatches to target implementations.

◆ hebr3() [2/2]

template<typename scalar_t >

void slate::internal::hebr3	(	internal::TargetType< Target::HostTask >	,
		int64_t	n,
		scalar_t *	v,
		HermitianMatrix< scalar_t > &	A,
		int	priority
	)

Implements task type 3 in the tridiagonal bulge chasing algorithm, updating a diagonal block with a 2-sided Householder transformation.

Parameters

[in]	n	Length of vector v.
[in]	v	The Householder reflector produced by task type 2.
[in,out]	A	A diagonal block in a sweep.

◆ herf()

template<typename scalar_t >

void slate::internal::herf	(	int64_t	n,
		scalar_t *	v,
		HermitianMatrix< scalar_t > &	A
	)

Applies a Householder reflector \(H = I - \tau v v^H\) to the Hermitian matrix \(A\) on the left and right.

Takes the \(\tau\) factor from \(v[0]\).

Parameters

[in]	n	Length of vector v.
[in]	v	The vector v in the representation of H. Modified but restored on exit.
[in,out]	A	The n-by-n Hermitian matrix A.

◆ stedc()

template<typename real_t >

void slate::stedc	(	std::vector< real_t > &	D,
		std::vector< real_t > &	E,
		Matrix< real_t > &	Q,
		Options const &	opts
	)

Computes all eigenvalues and eigenvectors of a real symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm.

Template Parameters

real_t One of float, double.

Parameters

[in,out]	D	On entry, the diagonal elements of the tridiagonal matrix. On exit, the eigenvalues in ascending order. D is duplicated on all MPI ranks.
[in,out]	E	On entry, the subdiagonal elements of the tridiagonal matrix. On exit, E has been destroyed. E is duplicated on all MPI ranks.
[out]	Q	On exit, Q contains the orthonormal eigenvectors of the symmetric tridiagonal matrix. Code currently requires that Q has a 2D block-cyclic distribution, with column-major grid order, which is the default for a SLATE Matrix.
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

◆ stedc_deflate()

template<typename real_t >

void slate::stedc_deflate	(	int64_t	n,
		int64_t	n1,
		real_t &	rho,
		real_t *	D,
		real_t *	Dsecular,
		real_t *	z,
		real_t *	zsecular,
		Matrix< real_t > &	Q,
		Matrix< real_t > &	Qtype,
		int64_t *	itype,
		int64_t &	nsecular,
		int64_t &	Qtype12_begin,
		int64_t &	Qtype12_end,
		int64_t &	Qtype23_begin,
		int64_t &	Qtype23_end,
		Options const &	opts
	)

Sorts the two sets of eigenvalues together into a single sorted set, then deflates eigenvalues, which both resolves stability issues (division by zero) and reduces the size of the problem.

There are two ways in which deflation can occur: 1) There is a zero or tiny entry in the z vector, indicating an eigenvalue of the subproblems is already converged to an eigenvalue of the merged problem, so it can be deflated. 2) Two eigenvalues are (nearly) identical. In this case, a Givens rotation is applied to zero an entry of z, and the corresponding eigenvalue can be deflated. For each such occurrence the order of the related secular equation problem is reduced by one.

Corresponds to ScaLAPACK pdlaed2. (Was: indicates ScaLAPACK names.)

Template Parameters

real_t One of float, double.

Parameters

[in]	n	Size of merged system. n = n1 + n2.
[in]	n1	Size of first subproblem, D1. Note that n2, the size of second subproblem, D2, is not passed and is computed as n2 = n - n1.
[in,out]	rho	On entry, the off-diagonal element associated with the rank-1 cut that originally split the two submatrices to be merged. On exit, rho has been scaled to be positive and make \(z\) unit norm, as required by stedc_secular.
[in,out]	D	Real vector of dimension n. On entry, D1 = D[ 0 : n1-1 ] contains eigenvalues of the first subproblem, D2 = D[ n1 : n-1 ] contains eigenvalues of the second subproblem. On exit, deflated eigenvalues are in decreasing order at the local end within a process column (pcol) per the 2D block cyclic distribution of Q. Example on 2 ranks with nb = 4. To easily track values, let D1 be multiples of 3, and D2 be even numbers. Deflate arbitrary values 6, 9, 21, 4, 12 (see ^ marks). Value 6 is type 2 deflation since it is repeated; the rest are type 1 deflation. Note 9, 6, 4 stay on pcol 0, and 21, 12 stay on pcol 1. pcol = [ 0 0 0 0 \| 1 1 1 1 \| 0 0 0 0 \| 1 1 1 1 ] D_in = [ 3 6 9 12 \| 15 18 21 24 \| 2 4 6 8 \| 10 12 14 16 ] [ ^ ^ \| ^ \| ^ \| ^ ] D_out = [ # # # # \| # # # # \| # 9 6 4 \| # # 21 12 ] [ \| \| ^ ^ ^ \| ^ ^ ] Dsecular = [ 2 3 6 8 \| 10 12 14 15 \| 16 18 24 # \| # # # # ] where \| separate blocks, and # values are not set.
[out]	Dsecular	Real vector of dimension n. On exit, Dsecular[ 0 : nsecular-1 ] contains non-deflated eigenvalues, sorted in increasing order. (Was: dlamda)
[in,out]	z	Real vector of dimension n. On entry, z is the updating vector, z = Q^T v = [ Q1^T 0 ] [ e_n1 ], [ 0 Q2^T ] [ e_1 ] which is the last row of Q1 and the first row of Q2. On exit, the contents of z are destroyed.
[out]	zsecular	Real vector of dimension n. On exit, zsecular[ 0 : nsecular-1 ] has non-deflated entries of z, as updated by normalizing and applying Givens rotations in deflation, for use by stedc_secular. (Was: w)
[in,out]	Q	Real n-by-n matrix. On entry, the eigenvectors of the two subproblems, Q = [ Q1 0 ]. [ 0 Q2 ] On exit, eigenvectors associated with type 2 deflation have been modified.
[in,out]	Qtype	Real n-by-n matrix. A copy of all the eigenvectors locally ordered by column type such that Qtype(:, itype) = Q(:, ideflate). Non-deflated eigenvectors will be used by stedc_merge in a matrix multiply (gemm) to solve for the new eigenvectors; deflated eigenvectors will be copied back to Q in the correct place.
[out]	ct_count	Integer npcol-by-5 array. On exit, ct_count( pcol, ctype ) is the number of columns of column type ctype on process column pcol. ctype = 1:4, column ctype = 0 is unused.
[out]	itype	Integer vector of dimension n. On exit, permutation to arrange columns of Qtype locally into 4 column types based on block structure: 1: non-zero in upper half only (rows 0 : n1-1). 2: non-zero in all rows; non-deflated eigenvector from type 2 deflation when one vector is from Q1 and the other vector is from Q2. 3: non-zero in lower half only (rows n1 : n-1). 4: may be non-zero in all rows; deflated eigenvectors. (Was: indx)
[out]	nsecular	On exit, number of non-deflated eigenvalues. (Was: k)
[out]	Qtype12_begin	On exit, index of first column in Qtype sub-matrix spanning column types 1 and 2. Because of local permutation, this may include columns of types 3 and 4. (Was: ib1)
[out]	Qtype12_end	On exit, index of last column + 1 in Qtype sub-matrix spanning column types 1 and 2. (Was: ib1 + nn1)
[out]	Qtype23_begin	On exit, index of first column in Qtype sub-matrix spanning column types 2 and 3. Because of local permutation, this may include columns of types 1 and 4. (Was: ib2)
[out]	Qtype23_end	On exit, index of last column + 1 in Qtype sub-matrix spanning column types 2 and 3. (Was: ib2 + nn2)
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

◆ stedc_merge()

template<typename real_t >

void slate::stedc_merge	(	int64_t	n,
		int64_t	n1,
		real_t	rho,
		real_t *	D,
		Matrix< real_t > &	Q,
		Matrix< real_t > &	Qtype,
		Matrix< real_t > &	U,
		Options const &	opts
	)

Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix, in parallel.

\[ T = Q_{in} ( D_{in} + \rho Z Z^H ) Q_{in}^H = Q_{out} D_{out} Q_{out}^H \]

where \(z = Q^H v\) and \(v = [ e_{n1} e_1 ]\) is a vector of length \(n\) with ones in the \(n1\) and \(n1 + 1\) elements and zeros elsewhere.

The eigenvectors of the original matrix are stored in Q, and the eigenvalues are in D. The algorithm consists of three stages:

The first stage consists of deflating the size of the problem when there are multiple eigenvalues or if there is a zero in the \(z\) vector. For each such occurence, the dimension of the secular equation problem is reduced by one. This stage is performed by the routine stedc_deflate.

The second stage consists of calculating the updated eigenvalues. This is done by finding the roots of the secular equation via the LAPACK routine laed4, called by stedc_secular. This routine also calculates the eigenvectors of the current problem.

The final stage consists of computing the updated eigenvectors directly using the updated eigenvalues. The eigenvectors for the current problem are multiplied with the eigenvectors from the overall problem.

Corresponds to ScaLAPACK pdlaed1.

Template Parameters

real_t One of float, double.

Parameters

[in]	n	Size of merged system. n = n1 + n2.
[in]	n1	Size of first subproblem, D1. Size of second subproblem, D2, is n2 = n - n1.
[in,out]	rho	On entry, the off-diagonal element associated with the rank-1 cut that originally split the two submatrices to be merged. On exit, updated by deflation process.
[in,out]	D	On entry, the eigenvalues of two subproblems. On exit, the eigenvalues of the merged problem, not sorted.
[in,out]	Q	On entry, Q contains eigenvectors of subproblems. On exit, Q contains the orthonormal eigenvectors of the merged problem.
[out]	Qtype	Qtype is a workspace, the same size as Q.
[out]	U	U is a workspace, the same size as Q.
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

◆ stedc_secular()

template<typename real_t >

void slate::stedc_secular	(	int64_t	nsecular,
		int64_t	n,
		real_t	rho,
		real_t *	D,
		real_t *	z,
		real_t *	Lambda,
		Matrix< real_t > &	U,
		int64_t *	itype,
		Options const &	opts
	)

Finds the nsecular roots of the secular equation, as defined by the values in rho, D, z, and computes the corresponding eigenvectors in U.

Corresponds to ScaLAPACK pdlaed3.

Template Parameters

real_t One of float, double.

Parameters

[in]	nsecular	The number of non-deflated eigenvalues, and the order of the related secular equation. 0 <= nsecular <= n.
[in]	n	The number of rows and columns in the U matrix. n >= nsecular (deflation may result in n > nsecular).
[in]	rho	The off-diagonal element associated with the rank-1 cut that originally split the two submatrices to be merged, as modified by stedc_deflate to be positive and make \(z\) unit norm.
[in]	D	The nsecular non-deflated eigenvalues of the two sub-problems, as output by stedc_deflate in Dsecular, which will be used by laed4 to form the secular equation.
[in]	z	The nsecular entries of the deflation-adjusted z vector, as output by stedc_deflate in zsecular.
[in,out]	Lambda	On entry, Lambda contains the (n - nsecular) deflated eigenvalues, as output by stedc_deflate in D. On exit, Lambda contains all n eigenvalues, permuted by itype.
[out]	U	On exit, U contains the orthonormal eigenvectors from the secular equation, permuted by itype.
[in]	itype	Permutation that arranged Qtype locally into 4 groups by column type, Qtype = [ Q11 Q12 Q14 ], [ Q22 Q23 Q24 ] as output by stedc_deflate. Used to permute Lambda and U to match Qtype.
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

◆ stedc_solve()

template<typename real_t >

void slate::stedc_solve	(	std::vector< real_t > &	D,
		std::vector< real_t > &	E,
		Matrix< real_t > &	Q,
		Matrix< real_t > &	W,
		Matrix< real_t > &	U,
		Options const &	opts
	)

Computes all eigenvalues and eigenvectors of a symmetric tridiagonal matrix in parallel, using the divide and conquer algorithm.

Corresponds to ScaLAPACK pdlaed0.

Template Parameters

real_t One of float, double.

Parameters

[in,out]	D	On entry, the diagonal elements of the tridiagonal matrix. On exit, the eigenvalues, not sorted.
[in,out]	E	On entry, the subdiagonal elements of the tridiagonal matrix. On exit, E has been destroyed.
[in,out]	Q	On entry, Q is the Identity. On exit, Q contains the orthonormal eigenvectors of the symmetric tridiagonal matrix.
[out]	W	W is a workspace, the same size as Q.
[out]	U	U is a workspace, the same size as Q.
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

◆ stedc_z_vector()

template<typename real_t >

void slate::stedc_z_vector	(	Matrix< real_t > &	Q,
		std::vector< real_t > &	z,
		Options const &	opts
	)

Communicates the z vector to all ranks.

Corresponds to ScaLAPACK pdlaedz.

Template Parameters

real_t One of float, double.

Parameters

[in]	Q	On entry, matrix of eigenvectors for 2 sub-problems being merged, Q = [ Q1 0 ]. [ 0 Q2 ] If Q is nt-by-nt tiles, then: Q1 is nt1-by-nt1 tiles with nt1 = floor( nt / 2 ), Q2 is nt2-by-nt2 tiles with nt2 = ceil( nt / 2 ).
[out]	z	On exit, z vector in divide-and-conquer algorithm, consisting of the last row of Q1 and the first row of Q2: z = Q^T [ e_{n_1} ] = [ Q1^T e_{n_1} ]. [ e_1 ] [ Q2^T e_1 ] z is duplicated on all MPI ranks.
[in]	opts	Additional options, as map of name = value pairs. Possible options: Currently none

◆ steqr2()

template<typename scalar_t >

void slate::steqr2	(	Job	jobz,
		std::vector< blas::real_type< scalar_t > > &	D,
		std::vector< blas::real_type< scalar_t > > &	E,
		Matrix< scalar_t > &	Z,
		Options const &	opts
	)

Computes all eigenvalues/eigenvectors of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm.

ATTENTION: only host computation supported for now

◆ sterf()

template<typename scalar_t >

void slate::sterf	(	std::vector< scalar_t > &	D,
		std::vector< scalar_t > &	E,
		Options const &	opts
	)

Computes all eigenvalues of a symmetric tridiagonal matrix using the Pal-Walker-Kahan variant of the QL or QR algorithm.

ATTENTION: only host computation supported for now

◆ unmtr_hb2st()

template<typename scalar_t >

void slate::unmtr_hb2st	(	Side	side,
		Op	op,
		Matrix< scalar_t > &	V,
		Matrix< scalar_t > &	C,
		const std::map< Option, Value > &	opts
	)

Multiplies the general m-by-n matrix C by Q from slate::hb2st as follows:

op	side = Left	side = Right (not supported)
op = NoTrans	\(Q C \)	\(C Q \)
op = ConjTrans	\(Q^H C\)	\(C Q^H\)

where \(Q\) is a unitary matrix defined as the product of k elementary reflectors

\[ Q = H(1) H(2) . . . H(k) \]

Template Parameters

scalar_t One of float, double, std::complex<float>, std::complex<double>.

Parameters

[in]	side	Side::Left: apply \(Q\) or \(Q^H\) from the left; Side::Right: apply \(Q\) or \(Q^H\) from the right (not supported).
[in]	op	Op::NoTrans apply \(Q\); Op::ConjTrans: apply \(Q^H\); Op::Trans: apply \(Q^T\) (only if real). In the real case, Op::Trans is equivalent to Op::ConjTrans. In the complex case, Op::Trans is not allowed.
[in]	V	Householder vectors as returned by `slate::hb2st`.
[in,out]	C	On entry, the m-by-n matrix \(C\). On exit, \(C\) is overwritten by \(Q C\), \(Q^H C\), \(C Q\), or \(C Q^H\). C must be distributed 1D block column (cyclic or non-cyclic).
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

◆ unmtr_he2hb()

template<typename scalar_t >

void slate::unmtr_he2hb	(	Side	side,
		Op	op,
		HermitianMatrix< scalar_t > &	A,
		TriangularFactors< scalar_t >	T,
		Matrix< scalar_t > &	C,
		Options const &	opts
	)

Multiplies the general m-by-n matrix C by Q from slate::he2hb as follows:

op	side = Left	side = Right
op = NoTrans	\(Q C \)	\(C Q \)
op = ConjTrans	\(Q^H C\)	\(C Q^H\)

where \(Q\) is a unitary matrix defined as the product of k elementary reflectors

\[ Q = H(1) H(2) . . . H(k) \]

Template Parameters

scalar_t One of float, double, std::complex<float>, std::complex<double>.

Parameters

[in]	side	Side::Left: apply \(Q\) or \(Q^H\) from the left; Side::Right: apply \(Q\) or \(Q^H\) from the right.
[in]	op	Op::NoTrans apply \(Q\); Op::ConjTrans: apply \(Q^H\); Op::Trans: apply \(Q^T\) (only if real). In the real case, Op::Trans is equivalent to Op::ConjTrans. In the complex case, Op::Trans is not allowed.
[in]	A	On entry, the n-by-n Hermitian matrix \(A\), as returned by `slate::he2hb`.
[in]	T	On entry, triangular matrices of the elementary reflector H(i), as returned by `slate::he2hb`.
[in,out]	C	On entry, the m-by-n matrix \(C\). On exit, \(C\) is overwritten by \(Q C\), \(Q^H C\), \(C Q\), or \(C Q^H\).
[in]	opts	Additional options, as map of name = value pairs. Possible options: Option::Target: Implementation to target. Possible values: HostTask: OpenMP tasks on CPU host [default]. HostNest: nested OpenMP parallel for loop on CPU host. HostBatch: batched BLAS on CPU host. Devices: batched BLAS on GPU device.

Functions

Detailed Description

Function Documentation

◆ hb2st()

◆ he2hb()

◆ hebr1()

◆ hebr2()

◆ hebr3() [1/2]

◆ hebr3() [2/2]

◆ herf()

◆ stedc()

◆ stedc_deflate()

◆ stedc_merge()

◆ stedc_secular()

◆ stedc_solve()

◆ stedc_z_vector()

◆ steqr2()

◆ sterf()

◆ unmtr_hb2st()

◆ unmtr_he2hb()