Namespace used for target implementations. More...

Typedefs
using	ProgressVector = std::vector< std::atomic< int64_t > >

using	Progress = std::vector< std::atomic< int64_t > >

Functions
template<Target target, typename scalar_t >
void	add (scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, Matrix< scalar_t > &B, Options const &opts)

template<Target target, typename scalar_t >
void	add (scalar_t alpha, BaseTrapezoidMatrix< scalar_t > &A, scalar_t beta, BaseTrapezoidMatrix< scalar_t > &B, Options const &opts)

template<Target target, typename scalar_t >
void	cholqr (Matrix< scalar_t > &A, Matrix< scalar_t > &R, Options const &opts)

template<Target target, typename scalar_t >
void	cholqr (Matrix< scalar_t > &A, HermitianMatrix< scalar_t > &R, Options const &opts)

template<Target target, typename matrix_type >
void	colNorms (Norm in_norm, matrix_type A, blas::real_type< typename matrix_type::value_type > *values, Options const &opts)

template<Target target, typename src_matrix_type , typename dst_matrix_type >
void	copy (src_matrix_type A, dst_matrix_type B, Options const &opts)

template<Target target, typename scalar_t >
void	gbmm (scalar_t alpha, BandMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Options const &opts)

template<Target target, typename scalar_t >
int64_t	gbtrf (BandMatrix< scalar_t > &A, Pivots &pivots, Options const &opts)
	Distributed parallel band LU factorization.

template<Target target, typename scalar_t >
void	ge2tb (Matrix< scalar_t > &A, TriangularFactors< scalar_t > &TU, TriangularFactors< scalar_t > &TV, Options const &opts)
	Distributed parallel reduction to band for 3-stage SVD.

template<Target target, typename scalar_t >
void	gelqf (Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts)
	Distributed parallel LQ factorization.

template<Target target, typename scalar_t >
void	gemmA (scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Options const &opts)

template<Target target, typename scalar_t >
void	gemmC (scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Options const &opts)

template<Target target, typename scalar_t >
void	geqrf (Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts)
	Distributed parallel QR factorization.

template<Target target, typename scalar_t >
int64_t	getrf (Matrix< scalar_t > &A, Pivots &pivots, Options const &opts)
	Distributed parallel LU factorization.

template<Target target, typename scalar_t >
int64_t	getrf_nopiv (Matrix< scalar_t > &A, Options const &opts)
	Distributed parallel LU factorization without pivoting.

template<Target target, typename scalar_t >
int64_t	getrf_tntpiv (Matrix< scalar_t > &A, Pivots &pivots, Options const &opts)
	Distributed parallel CALU factorization.

template<Target target, typename scalar_t >
void	getri (Matrix< scalar_t > &A, Pivots &pivots, Options const &opts)
	Distributed parallel inverse of a general matrix.

template<typename scalar_t >
void	hb2st_step (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, int64_t sweep, int64_t step)

template<typename scalar_t >
void	hb2st_run (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, int thread_rank, int thread_size, ProgressVector &progress)

template<Target target, typename scalar_t >
void	hb2st (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, Options const &opts)

template<Target target, typename scalar_t >
void	hbmm (Side side, scalar_t alpha, HermitianBandMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)

template<Target target, typename scalar_t >
void	he2hb (HermitianMatrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts)
	Distributed parallel reduction to band for 3-stage Hermitian eigenvalue decomposition.

template<Target target, typename scalar_t >
void	hegst (int64_t itype, HermitianMatrix< scalar_t > A, HermitianMatrix< scalar_t > B, Options const &opts)
	Distributed parallel reduction of a complex Hermitian positive-definite generalized eigenvalue problem to the standard form.

template<Target target, typename scalar_t >
void	hemmA (Side side, scalar_t alpha, HermitianMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)

template<Target target, typename scalar_t >
void	hemmC (Side side, scalar_t alpha, HermitianMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)

template<Target target, typename scalar_t >
void	her2k (scalar_t alpha, Matrix< scalar_t > A, Matrix< scalar_t > B, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > C, Options const &opts)

template<Target target, typename scalar_t >
void	herk (blas::real_type< scalar_t > alpha, Matrix< scalar_t > A, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > C, Options const &opts)

template<Target target, typename scalar_t >
int64_t	hetrf (HermitianMatrix< scalar_t > &A, Pivots &pivots, BandMatrix< scalar_t > &T, Pivots &pivots2, Matrix< scalar_t > &H, Options const &opts)
	Distributed parallel Hermitian indefinite \(LTL^T\) factorization.

template<Target target, typename matrix_type >
blas::real_type< typename matrix_type::value_type >	norm (Norm in_norm, matrix_type A, Options const &opts)

template<Target target, typename scalar_t >
int64_t	pbtrf (HermitianBandMatrix< scalar_t > A, Options const &opts)
	Distributed parallel band Cholesky factorization.

template<Target target, typename scalar_t >
int64_t	potrf (slate::internal::TargetType< target >, HermitianMatrix< scalar_t > A, Options const &opts)
	Distributed parallel Cholesky factorization.

template<Target target, typename scalar_t >
void	scale (blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, Matrix< scalar_t > &A, Options const &opts)

template<Target target, typename scalar_t >
void	scale (blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, BaseTrapezoidMatrix< scalar_t > &A, Options const &opts)

template<Target target, typename scalar_t , typename scalar_t2 >
void	scale_row_col (Equed equed, std::vector< scalar_t2 > const &R, std::vector< scalar_t2 > const &C, Matrix< scalar_t > &A, Options const &opts)

template<Target target, typename scalar_t >
void	set (scalar_t offdiag_value, scalar_t diag_value, Matrix< scalar_t > &A, Options const &opts)

template<Target target, typename scalar_t >
void	set (scalar_t offdiag_value, scalar_t diag_value, BaseTrapezoidMatrix< scalar_t > &A, Options const &opts)

template<Target target, typename scalar_t >
void	symm (Side side, scalar_t alpha, SymmetricMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)

template<Target target, typename scalar_t >
void	syr2k (scalar_t alpha, Matrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, SymmetricMatrix< scalar_t > C, Options const &opts)

template<Target target, typename scalar_t >
void	syrk (scalar_t alpha, Matrix< scalar_t > A, scalar_t beta, SymmetricMatrix< scalar_t > C, Options const &opts)

template<typename scalar_t >
void	tb2bd_step (TriangularBandMatrix< scalar_t > &A, Matrix< scalar_t > &U, Matrix< scalar_t > &V, int64_t band, int64_t sweep, int64_t step)

template<typename scalar_t >
void	tb2bd_run (TriangularBandMatrix< scalar_t > &A, Matrix< scalar_t > &U, Matrix< scalar_t > &V, int64_t band, int64_t diag_len, int64_t pass_size, int thread_rank, int thread_size, Progress &progress)

template<Target target, typename scalar_t >
void	tb2bd (TriangularBandMatrix< scalar_t > &A, Matrix< scalar_t > &U, Matrix< scalar_t > &V, Options const &opts)

template<Target target, typename scalar_t >
void	tbsm (Side side, scalar_t alpha, TriangularBandMatrix< scalar_t > A, Pivots &pivots, Matrix< scalar_t > B, Options const &opts)

template<Target target, typename scalar_t >
void	trmm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, Options const &opts)

template<Target target, typename scalar_t >
void	trsmA (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, Options const &opts)

template<Target target, typename scalar_t >
void	trsmB (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, Options const &opts)

template<Target target, typename scalar_t >
void	trtri (TriangularMatrix< scalar_t > A, Options const &opts)
	Distributed parallel inverse of a triangular matrix.

template<Target target, typename scalar_t >
void	trtrm (TriangularMatrix< scalar_t > A, Options const &opts)
	todo: update docs: multiply not inverse.

template<Target target, typename scalar_t >
void	unmlq (Side side, Op op, Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Matrix< scalar_t > &C, Options const &opts)
	Distributed parallel multiply by Q from LQ factorization.

template<Target target, typename scalar_t >
void	unmqr (Side side, Op op, Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Matrix< scalar_t > &C, Options const &opts)
	Distributed parallel multiply by Q from QR factorization.

template<Target target, typename scalar_t >
void	unmtr_hb2st (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C, const std::map< Option, Value > &opts)
	Distributed parallel unmtr_hb2st.

Detailed Description

Namespace used for target implementations.

This differentiates, for example:

slate::gemm, which is the user-callable top-level function,
slate::gemmC, which is the SUMMA variant of gemm,
slate::impl::gemmC, which is the target implementation of slate::gemmC,
slate::internal::gemm, which is one step (one block outer-product) of slate::impl::gemmC.
slate::tile::gemm, which multiplies 2 individual tiles on the CPU.

Function Documentation

◆ add() [1/2]

template<Target target, typename scalar_t >

void slate::impl::add	(	scalar_t	alpha,
		BaseTrapezoidMatrix< scalar_t > &	A,
		scalar_t	beta,
		BaseTrapezoidMatrix< scalar_t > &	B,
		Options const &	opts
	)

Distributed parallel matrix-matrix addition. Generic implementation for any target.

◆ add() [2/2]

template<Target target, typename scalar_t >

void slate::impl::add	(	scalar_t	alpha,
		Matrix< scalar_t > &	A,
		scalar_t	beta,
		Matrix< scalar_t > &	B,
		Options const &	opts
	)

Distributed parallel general matrix-matrix addition. Generic implementation for any target.

◆ colNorms()

template<Target target, typename matrix_type >

void slate::impl::colNorms	(	Norm	in_norm,
		matrix_type	A,
		blas::real_type< typename matrix_type::value_type > *	values,
		Options const &	opts
	)

Distributed parallel matrix norm. Generic implementation for any target.

◆ copy()

template<Target target, typename src_matrix_type , typename dst_matrix_type >

void slate::impl::copy	(	src_matrix_type	A,
		dst_matrix_type	B,
		Options const &	opts
	)

Copy and precision conversion. Generic implementation for any target.

◆ gbmm()

template<Target target, typename scalar_t >

void slate::impl::gbmm	(	scalar_t	alpha,
		BandMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		scalar_t	beta,
		Matrix< scalar_t > &	C,
		Options const &	opts
	)

Distributed parallel general matrix-matrix multiplication. Generic implementation for any target. Dependencies enforce the following behavior:

bcast communications are serialized,
gemm operations are serialized,
bcasts can get ahead of gemms by the value of lookahead. ColMajor layout is assumed

◆ gbtrf()

template<Target target, typename scalar_t >

int64_t slate::impl::gbtrf	(	BandMatrix< scalar_t > &	A,
		Pivots &	pivots,
		Options const &	opts
	)

Distributed parallel band LU factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

Warning: ColMajor layout is assumed

◆ ge2tb()

template<Target target, typename scalar_t >

void slate::impl::ge2tb	(	Matrix< scalar_t > &	A,
		TriangularFactors< scalar_t > &	TU,
		TriangularFactors< scalar_t > &	TV,
		Options const &	opts
	)

Distributed parallel reduction to band for 3-stage SVD.

Generic implementation for any target. Panel computed on host using Host OpenMP task.

ColMajor layout is assumed

◆ gelqf()

template<Target target, typename scalar_t >

void slate::impl::gelqf	(	Matrix< scalar_t > &	A,
		TriangularFactors< scalar_t > &	T,
		Options const &	opts
	)

Distributed parallel LQ factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

ColMajor layout is assumed

◆ gemmC()

template<Target target, typename scalar_t >

void slate::impl::gemmC	(	scalar_t	alpha,
		Matrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		scalar_t	beta,
		Matrix< scalar_t > &	C,
		Options const &	opts
	)

Distributed parallel general matrix-matrix multiplication. Generic implementation for any target. Dependencies enforce the following behavior:

bcast communications are serialized,
gemm operations are serialized,
bcasts can get ahead of gemms by the value of lookahead. ColMajor layout is assumed

◆ geqrf()

template<Target target, typename scalar_t >

void slate::impl::geqrf	(	Matrix< scalar_t > &	A,
		TriangularFactors< scalar_t > &	T,
		Options const &	opts
	)

Distributed parallel QR factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

ColMajor layout is assumed

◆ hb2st_run()

template<typename scalar_t >

void slate::impl::hb2st_run	(	HermitianBandMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	V,
		int	thread_rank,
		int	thread_size,
		ProgressVector &	progress
	)

Implements multi-threaded tridiagonal bulge chasing. This is the main routine that each thread runs.

Parameters

[in,out]	A	The band Hermitian matrix A.
[out]	V	Matrix of Householder reflectors produced in the process. Dimension 2*band-by-XYZ todo
[in]	thread_rank	rank of this thread
[in]	thread_size	number of threads
[in]	progress	progress table for synchronizing threads

printf( "tid %d pass %lld, task %lld, %lld\n",

◆ hb2st_step()

template<typename scalar_t >

void slate::impl::hb2st_step	(	HermitianBandMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	V,
		int64_t	sweep,
		int64_t	step
	)

Implements the tasks of tridiagonal bulge chasing.

Parameters

[in,out]	A	The band Hermitian matrix A.
[out]	V	Matrix of Householder reflectors produced in the process. Dimension 2*band-by-... todo.
[in]	sweep	The sweep number. One sweep eliminates one row and sweeps the entire matrix.
[in]	step	The step number. Steps in each sweep have consecutive numbers.

◆ hbmm()

template<Target target, typename scalar_t >

void slate::impl::hbmm	(	Side	side,
		scalar_t	alpha,
		HermitianBandMatrix< scalar_t >	A,
		Matrix< scalar_t >	B,
		scalar_t	beta,
		Matrix< scalar_t >	C,
		Options const &	opts
	)

Distributed parallel Hermitian banded matrix-matrix multiplication. Generic implementation for any target. Dependencies enforce the following behavior:

bcast communications are serialized,
hbmm operations are serialized,
bcasts can get ahead of hbmms by the value of lookahead. Note A, B, and C are passed by value, so we can transpose if needed (for side = right) without affecting caller.

ColMajor layout is assumed

◆ hegst()

template<Target target, typename scalar_t >

void slate::impl::hegst	(	int64_t	itype,
		HermitianMatrix< scalar_t >	A,
		HermitianMatrix< scalar_t >	B,
		Options const &	opts
	)

Distributed parallel reduction of a complex Hermitian positive-definite generalized eigenvalue problem to the standard form.

Generic implementation for any target.

◆ her2k()

template<Target target, typename scalar_t >

void slate::impl::her2k	(	scalar_t	alpha,
		Matrix< scalar_t >	A,
		Matrix< scalar_t >	B,
		blas::real_type< scalar_t >	beta,
		HermitianMatrix< scalar_t >	C,
		Options const &	opts
	)

Distributed parallel Hermitian rank 2k update. Generic implementation for any target. Dependencies enforce the following behavior:

bcast communications are serialized,
her2k operations are serialized,
bcasts can get ahead of her2ks by the value of lookahead. Note A, B, and C are passed by value, so we can transpose if needed (for uplo = Upper) without affecting caller.

◆ hetrf()

template<Target target, typename scalar_t >

int64_t slate::impl::hetrf	(	HermitianMatrix< scalar_t > &	A,
		Pivots &	pivots,
		BandMatrix< scalar_t > &	T,
		Pivots &	pivots2,
		Matrix< scalar_t > &	H,
		Options const &	opts
	)

Distributed parallel Hermitian indefinite \(LTL^T\) factorization.

Generic implementation for any target. GPU version not yet implemented.

◆ norm()

template<Target target, typename matrix_type >

blas::real_type< typename matrix_type::value_type > slate::impl::norm	(	Norm	in_norm,
		matrix_type	A,
		Options const &	opts
	)

Distributed parallel general matrix norm. Generic implementation for any target.

◆ pbtrf()

template<Target target, typename scalar_t >

int64_t slate::impl::pbtrf	(	HermitianBandMatrix< scalar_t >	A,
		Options const &	opts
	)

Distributed parallel band Cholesky factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

Warning: ColMajor layout is assumed

◆ potrf()

template<Target target, typename scalar_t >

int64_t slate::impl::potrf	(	slate::internal::TargetType< target >	,
		HermitianMatrix< scalar_t >	A,
		Options const &	opts
	)

Distributed parallel Cholesky factorization.

Generic implementation for any target.

◆ scale() [1/2]

template<Target target, typename scalar_t >

void slate::impl::scale	(	blas::real_type< scalar_t >	numer,
		blas::real_type< scalar_t >	denom,
		BaseTrapezoidMatrix< scalar_t > &	A,
		Options const &	opts
	)

Set matrix entries. Generic implementation for any target.

◆ scale() [2/2]

template<Target target, typename scalar_t >

void slate::impl::scale	(	blas::real_type< scalar_t >	numer,
		blas::real_type< scalar_t >	denom,
		Matrix< scalar_t > &	A,
		Options const &	opts
	)

Scale matrix entries by the real scalar numer/denom. Generic implementation for any target.

◆ scale_row_col()

template<Target target, typename scalar_t , typename scalar_t2 >

void slate::impl::scale_row_col	(	Equed	equed,
		std::vector< scalar_t2 > const &	R,
		std::vector< scalar_t2 > const &	C,
		Matrix< scalar_t > &	A,
		Options const &	opts
	)

Apply row or column scaling, or both, to a Matrix. Generic implementation for any target.

◆ set() [1/2]

template<Target target, typename scalar_t >

void slate::impl::set	(	scalar_t	offdiag_value,
		scalar_t	diag_value,
		BaseTrapezoidMatrix< scalar_t > &	A,
		Options const &	opts
	)

Set matrix entries. Generic implementation for any target.

◆ set() [2/2]

template<Target target, typename scalar_t >

void slate::impl::set	(	scalar_t	offdiag_value,
		scalar_t	diag_value,
		Matrix< scalar_t > &	A,
		Options const &	opts
	)

Set matrix entries. Generic implementation for any target.

◆ syr2k()

template<Target target, typename scalar_t >

void slate::impl::syr2k	(	scalar_t	alpha,
		Matrix< scalar_t >	A,
		Matrix< scalar_t >	B,
		scalar_t	beta,
		SymmetricMatrix< scalar_t >	C,
		Options const &	opts
	)

Distributed parallel symmetric rank 2k update. Generic implementation for any target. Dependencies enforce the following behavior:

bcast communications are serialized,
syr2k operations are serialized,
bcasts can get ahead of syr2ks by the value of lookahead. Note A, B, and C are passed by value, so we can transpose if needed (for uplo = Upper) without affecting caller.

◆ syrk()

template<Target target, typename scalar_t >

void slate::impl::syrk	(	scalar_t	alpha,
		Matrix< scalar_t >	A,
		scalar_t	beta,
		SymmetricMatrix< scalar_t >	C,
		Options const &	opts
	)

Distributed parallel symmetric rank k update. Generic implementation for any target. Dependencies enforce the following behavior:

bcast communications are serialized,
syrk operations are serialized,
bcasts can get ahead of syrks by the value of lookahead. Note A and C are passed by value, so we can transpose if needed (for uplo = Upper) without affecting caller.

◆ tb2bd()

template<Target target, typename scalar_t >

void slate::impl::tb2bd	(	TriangularBandMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	U,
		Matrix< scalar_t > &	V,
		Options const &	opts
	)

Reduces a band matrix to a bidiagonal matrix using bulge chasing.

◆ tb2bd_run()

template<typename scalar_t >

void slate::impl::tb2bd_run	(	TriangularBandMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	U,
		Matrix< scalar_t > &	V,
		int64_t	band,
		int64_t	diag_len,
		int64_t	pass_size,
		int	thread_rank,
		int	thread_size,
		Progress &	progress
	)

Implements multi-threaded bidiagonal bulge chasing.

Parameters

[in,out]	A	The band matrix A.
[in]	band	The bandwidth of matrix A.
[in]	diag_len	The length of the diagonal.
[in]	pass_size	The number of rows eliminated at a time.
[in]	thread_rank	rank of this thread
[in]	thread_size	number of threads
[in]	progress	progress table for synchronizing threads

◆ tb2bd_step()

template<typename scalar_t >

void slate::impl::tb2bd_step	(	TriangularBandMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	U,
		Matrix< scalar_t > &	V,
		int64_t	band,
		int64_t	sweep,
		int64_t	step
	)

Implements the tasks of bidiagonal bulge chasing.

Parameters

[in,out]	A	The band matrix A.
[out]	U	Matrix to store the householder vectors applied to the left of the band matrix A. U is 2nb-by-nt(nt + 1)/2*nb, where nb is the tile size (A.tileNb(0)) and nt is the number of A tiles (A.nt()). U Matrix need to be allocated on mpi rank 0 where the band A matrix is.
[out]	V	Matrix to store the householder vectors applied to the right of the band matrix A. V is 2nb-by-nt(nt + 1)/2*nb, where nb is the tile size (A.tileNb(0)) and nt is the number of A tiles (A.nt()). V Matrix need to be allocated on mpi rank 0 where the band A matrix is.
[in]	band	The bandwidth of matrix A.
[in]	sweep	The sweep number. One sweep eliminates one row and sweeps the entire matrix.
[in]	step	The step number. Steps in each sweep have consecutive numbers.

◆ tbsm()

template<Target target, typename scalar_t >

void slate::impl::tbsm	(	Side	side,
		scalar_t	alpha,
		TriangularBandMatrix< scalar_t >	A,
		Pivots &	pivots,
		Matrix< scalar_t >	B,
		Options const &	opts
	)

Distributed parallel triangular matrix solve. Generic implementation for any target. Note A and B are passed by value, so we can transpose if needed (for side = right) without affecting caller.

◆ trmm()

template<Target target, typename scalar_t >

void slate::impl::trmm	(	Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		Options const &	opts
	)

Distributed parallel triangular matrix-matrix multiplication. Generic implementation for any target.

◆ trsmB()

template<Target target, typename scalar_t >

void slate::impl::trsmB	(	Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		Options const &	opts
	)

Distributed parallel triangular matrix solve. Generic implementation for any target.

◆ trtri()

template<Target target, typename scalar_t >

void slate::impl::trtri	(	TriangularMatrix< scalar_t >	A,
		Options const &	opts
	)

Distributed parallel inverse of a triangular matrix.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

◆ trtrm()

template<Target target, typename scalar_t >

void slate::impl::trtrm	(	TriangularMatrix< scalar_t >	A,
		Options const &	opts
	)

todo: update docs: multiply not inverse.

Distributed parallel inverse of a triangular matrix. Generic implementation for any target.

◆ unmlq()

template<Target target, typename scalar_t >

void slate::impl::unmlq	(	Side	side,
		Op	op,
		Matrix< scalar_t > &	A,
		TriangularFactors< scalar_t > &	T,
		Matrix< scalar_t > &	C,
		Options const &	opts
	)

Distributed parallel multiply by Q from LQ factorization.

Generic implementation for any target.

◆ unmqr()

template<Target target, typename scalar_t >

void slate::impl::unmqr	(	Side	side,
		Op	op,
		Matrix< scalar_t > &	A,
		TriangularFactors< scalar_t > &	T,
		Matrix< scalar_t > &	C,
		Options const &	opts
	)

Distributed parallel multiply by Q from QR factorization.

Generic implementation for any target.

Typedefs

Functions

Detailed Description

Function Documentation

◆ add() [1/2]

◆ add() [2/2]

◆ colNorms()

◆ copy()

◆ gbmm()

◆ gbtrf()

◆ ge2tb()

◆ gelqf()

◆ gemmC()

◆ geqrf()

◆ hb2st_run()

◆ hb2st_step()

◆ hbmm()

◆ hegst()

◆ her2k()

◆ hetrf()

◆ norm()

◆ pbtrf()

◆ potrf()

◆ scale() [1/2]

◆ scale() [2/2]

◆ scale_row_col()

◆ set() [1/2]

◆ set() [2/2]

◆ syr2k()

◆ syrk()

◆ tb2bd()

◆ tb2bd_run()

◆ tb2bd_step()

◆ tbsm()

◆ trmm()

◆ trsmB()

◆ trtri()

◆ trtrm()

◆ unmlq()

◆ unmqr()