SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
Loading...
Searching...
No Matches
slate::impl Namespace Reference

Namespace used for target implementations. More...

Typedefs

using ProgressVector = std::vector< std::atomic< int64_t > >
 
using Progress = std::vector< std::atomic< int64_t > >
 

Functions

template<Target target, typename scalar_t >
void add (scalar_t alpha, Matrix< scalar_t > &A, scalar_t beta, Matrix< scalar_t > &B, Options const &opts)
 
template<Target target, typename scalar_t >
void add (scalar_t alpha, BaseTrapezoidMatrix< scalar_t > &A, scalar_t beta, BaseTrapezoidMatrix< scalar_t > &B, Options const &opts)
 
template<Target target, typename scalar_t >
void cholqr (Matrix< scalar_t > &A, Matrix< scalar_t > &R, Options const &opts)
 
template<Target target, typename scalar_t >
void cholqr (Matrix< scalar_t > &A, HermitianMatrix< scalar_t > &R, Options const &opts)
 
template<Target target, typename matrix_type >
void colNorms (Norm in_norm, matrix_type A, blas::real_type< typename matrix_type::value_type > *values, Options const &opts)
 
template<Target target, typename src_matrix_type , typename dst_matrix_type >
void copy (src_matrix_type A, dst_matrix_type B, Options const &opts)
 
template<Target target, typename scalar_t >
void gbmm (scalar_t alpha, BandMatrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Options const &opts)
 
template<Target target, typename scalar_t >
int64_t gbtrf (BandMatrix< scalar_t > &A, Pivots &pivots, Options const &opts)
 Distributed parallel band LU factorization.
 
template<Target target, typename scalar_t >
void ge2tb (Matrix< scalar_t > &A, TriangularFactors< scalar_t > &TU, TriangularFactors< scalar_t > &TV, Options const &opts)
 Distributed parallel reduction to band for 3-stage SVD.
 
template<Target target, typename scalar_t >
void gelqf (Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts)
 Distributed parallel LQ factorization.
 
template<Target target, typename scalar_t >
void gemmA (scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Options const &opts)
 
template<Target target, typename scalar_t >
void gemmC (scalar_t alpha, Matrix< scalar_t > &A, Matrix< scalar_t > &B, scalar_t beta, Matrix< scalar_t > &C, Options const &opts)
 
template<Target target, typename scalar_t >
void geqrf (Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts)
 Distributed parallel QR factorization.
 
template<Target target, typename scalar_t >
int64_t getrf (Matrix< scalar_t > &A, Pivots &pivots, Options const &opts)
 Distributed parallel LU factorization.
 
template<Target target, typename scalar_t >
int64_t getrf_nopiv (Matrix< scalar_t > &A, Options const &opts)
 Distributed parallel LU factorization without pivoting.
 
template<Target target, typename scalar_t >
int64_t getrf_tntpiv (Matrix< scalar_t > &A, Pivots &pivots, Options const &opts)
 Distributed parallel CALU factorization.
 
template<Target target, typename scalar_t >
void getri (Matrix< scalar_t > &A, Pivots &pivots, Options const &opts)
 Distributed parallel inverse of a general matrix.
 
template<typename scalar_t >
void hb2st_step (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, int64_t sweep, int64_t step)
 
template<typename scalar_t >
void hb2st_run (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, int thread_rank, int thread_size, ProgressVector &progress)
 
template<Target target, typename scalar_t >
void hb2st (HermitianBandMatrix< scalar_t > &A, Matrix< scalar_t > &V, Options const &opts)
 
template<Target target, typename scalar_t >
void hbmm (Side side, scalar_t alpha, HermitianBandMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)
 
template<Target target, typename scalar_t >
void he2hb (HermitianMatrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Options const &opts)
 Distributed parallel reduction to band for 3-stage Hermitian eigenvalue decomposition.
 
template<Target target, typename scalar_t >
void hegst (int64_t itype, HermitianMatrix< scalar_t > A, HermitianMatrix< scalar_t > B, Options const &opts)
 Distributed parallel reduction of a complex Hermitian positive-definite generalized eigenvalue problem to the standard form.
 
template<Target target, typename scalar_t >
void hemmA (Side side, scalar_t alpha, HermitianMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)
 
template<Target target, typename scalar_t >
void hemmC (Side side, scalar_t alpha, HermitianMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)
 
template<Target target, typename scalar_t >
void her2k (scalar_t alpha, Matrix< scalar_t > A, Matrix< scalar_t > B, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > C, Options const &opts)
 
template<Target target, typename scalar_t >
void herk (blas::real_type< scalar_t > alpha, Matrix< scalar_t > A, blas::real_type< scalar_t > beta, HermitianMatrix< scalar_t > C, Options const &opts)
 
template<Target target, typename scalar_t >
int64_t hetrf (HermitianMatrix< scalar_t > &A, Pivots &pivots, BandMatrix< scalar_t > &T, Pivots &pivots2, Matrix< scalar_t > &H, Options const &opts)
 Distributed parallel Hermitian indefinite \(LTL^T\) factorization.
 
template<Target target, typename matrix_type >
blas::real_type< typename matrix_type::value_type > norm (Norm in_norm, matrix_type A, Options const &opts)
 
template<Target target, typename scalar_t >
int64_t pbtrf (HermitianBandMatrix< scalar_t > A, Options const &opts)
 Distributed parallel band Cholesky factorization.
 
template<Target target, typename scalar_t >
int64_t potrf (slate::internal::TargetType< target >, HermitianMatrix< scalar_t > A, Options const &opts)
 Distributed parallel Cholesky factorization.
 
template<Target target, typename scalar_t >
void scale (blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, Matrix< scalar_t > &A, Options const &opts)
 
template<Target target, typename scalar_t >
void scale (blas::real_type< scalar_t > numer, blas::real_type< scalar_t > denom, BaseTrapezoidMatrix< scalar_t > &A, Options const &opts)
 
template<Target target, typename scalar_t , typename scalar_t2 >
void scale_row_col (Equed equed, std::vector< scalar_t2 > const &R, std::vector< scalar_t2 > const &C, Matrix< scalar_t > &A, Options const &opts)
 
template<Target target, typename scalar_t >
void set (scalar_t offdiag_value, scalar_t diag_value, Matrix< scalar_t > &A, Options const &opts)
 
template<Target target, typename scalar_t >
void set (scalar_t offdiag_value, scalar_t diag_value, BaseTrapezoidMatrix< scalar_t > &A, Options const &opts)
 
template<Target target, typename scalar_t >
void symm (Side side, scalar_t alpha, SymmetricMatrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, Matrix< scalar_t > C, Options const &opts)
 
template<Target target, typename scalar_t >
void syr2k (scalar_t alpha, Matrix< scalar_t > A, Matrix< scalar_t > B, scalar_t beta, SymmetricMatrix< scalar_t > C, Options const &opts)
 
template<Target target, typename scalar_t >
void syrk (scalar_t alpha, Matrix< scalar_t > A, scalar_t beta, SymmetricMatrix< scalar_t > C, Options const &opts)
 
template<typename scalar_t >
void tb2bd_step (TriangularBandMatrix< scalar_t > &A, Matrix< scalar_t > &U, Matrix< scalar_t > &V, int64_t band, int64_t sweep, int64_t step)
 
template<typename scalar_t >
void tb2bd_run (TriangularBandMatrix< scalar_t > &A, Matrix< scalar_t > &U, Matrix< scalar_t > &V, int64_t band, int64_t diag_len, int64_t pass_size, int thread_rank, int thread_size, Progress &progress)
 
template<Target target, typename scalar_t >
void tb2bd (TriangularBandMatrix< scalar_t > &A, Matrix< scalar_t > &U, Matrix< scalar_t > &V, Options const &opts)
 
template<Target target, typename scalar_t >
void tbsm (Side side, scalar_t alpha, TriangularBandMatrix< scalar_t > A, Pivots &pivots, Matrix< scalar_t > B, Options const &opts)
 
template<Target target, typename scalar_t >
void trmm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, Options const &opts)
 
template<Target target, typename scalar_t >
void trsmA (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, Options const &opts)
 
template<Target target, typename scalar_t >
void trsmB (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, Options const &opts)
 
template<Target target, typename scalar_t >
void trtri (TriangularMatrix< scalar_t > A, Options const &opts)
 Distributed parallel inverse of a triangular matrix.
 
template<Target target, typename scalar_t >
void trtrm (TriangularMatrix< scalar_t > A, Options const &opts)
 todo: update docs: multiply not inverse.
 
template<Target target, typename scalar_t >
void unmlq (Side side, Op op, Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Matrix< scalar_t > &C, Options const &opts)
 Distributed parallel multiply by Q from LQ factorization.
 
template<Target target, typename scalar_t >
void unmqr (Side side, Op op, Matrix< scalar_t > &A, TriangularFactors< scalar_t > &T, Matrix< scalar_t > &C, Options const &opts)
 Distributed parallel multiply by Q from QR factorization.
 
template<Target target, typename scalar_t >
void unmtr_hb2st (Side side, Op op, Matrix< scalar_t > &V, Matrix< scalar_t > &C, const std::map< Option, Value > &opts)
 Distributed parallel unmtr_hb2st.
 

Detailed Description

Namespace used for target implementations.

This differentiates, for example:

Function Documentation

◆ add() [1/2]

template<Target target, typename scalar_t >
void slate::impl::add ( scalar_t  alpha,
BaseTrapezoidMatrix< scalar_t > &  A,
scalar_t  beta,
BaseTrapezoidMatrix< scalar_t > &  B,
Options const &  opts 
)

Distributed parallel matrix-matrix addition. Generic implementation for any target.

◆ add() [2/2]

template<Target target, typename scalar_t >
void slate::impl::add ( scalar_t  alpha,
Matrix< scalar_t > &  A,
scalar_t  beta,
Matrix< scalar_t > &  B,
Options const &  opts 
)

Distributed parallel general matrix-matrix addition. Generic implementation for any target.

◆ colNorms()

template<Target target, typename matrix_type >
void slate::impl::colNorms ( Norm  in_norm,
matrix_type  A,
blas::real_type< typename matrix_type::value_type > *  values,
Options const &  opts 
)

Distributed parallel matrix norm. Generic implementation for any target.

◆ copy()

template<Target target, typename src_matrix_type , typename dst_matrix_type >
void slate::impl::copy ( src_matrix_type  A,
dst_matrix_type  B,
Options const &  opts 
)

Copy and precision conversion. Generic implementation for any target.

◆ gbmm()

template<Target target, typename scalar_t >
void slate::impl::gbmm ( scalar_t  alpha,
BandMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
scalar_t  beta,
Matrix< scalar_t > &  C,
Options const &  opts 
)

Distributed parallel general matrix-matrix multiplication. Generic implementation for any target. Dependencies enforce the following behavior:

  • bcast communications are serialized,
  • gemm operations are serialized,
  • bcasts can get ahead of gemms by the value of lookahead. ColMajor layout is assumed

◆ gbtrf()

template<Target target, typename scalar_t >
int64_t slate::impl::gbtrf ( BandMatrix< scalar_t > &  A,
Pivots &  pivots,
Options const &  opts 
)

Distributed parallel band LU factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

Warning: ColMajor layout is assumed

◆ ge2tb()

template<Target target, typename scalar_t >
void slate::impl::ge2tb ( Matrix< scalar_t > &  A,
TriangularFactors< scalar_t > &  TU,
TriangularFactors< scalar_t > &  TV,
Options const &  opts 
)

Distributed parallel reduction to band for 3-stage SVD.

Generic implementation for any target. Panel computed on host using Host OpenMP task.

ColMajor layout is assumed

◆ gelqf()

template<Target target, typename scalar_t >
void slate::impl::gelqf ( Matrix< scalar_t > &  A,
TriangularFactors< scalar_t > &  T,
Options const &  opts 
)

Distributed parallel LQ factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

ColMajor layout is assumed

◆ gemmC()

template<Target target, typename scalar_t >
void slate::impl::gemmC ( scalar_t  alpha,
Matrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
scalar_t  beta,
Matrix< scalar_t > &  C,
Options const &  opts 
)

Distributed parallel general matrix-matrix multiplication. Generic implementation for any target. Dependencies enforce the following behavior:

  • bcast communications are serialized,
  • gemm operations are serialized,
  • bcasts can get ahead of gemms by the value of lookahead. ColMajor layout is assumed

◆ geqrf()

template<Target target, typename scalar_t >
void slate::impl::geqrf ( Matrix< scalar_t > &  A,
TriangularFactors< scalar_t > &  T,
Options const &  opts 
)

Distributed parallel QR factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

ColMajor layout is assumed

◆ hb2st_run()

template<typename scalar_t >
void slate::impl::hb2st_run ( HermitianBandMatrix< scalar_t > &  A,
Matrix< scalar_t > &  V,
int  thread_rank,
int  thread_size,
ProgressVector &  progress 
)

Implements multi-threaded tridiagonal bulge chasing. This is the main routine that each thread runs.

Parameters
[in,out]AThe band Hermitian matrix A.
[out]VMatrix of Householder reflectors produced in the process. Dimension 2*band-by-XYZ todo
[in]thread_rankrank of this thread
[in]thread_sizenumber of threads
[in]progressprogress table for synchronizing threads

printf( "tid %d pass %lld, task %lld, %lld\n",

◆ hb2st_step()

template<typename scalar_t >
void slate::impl::hb2st_step ( HermitianBandMatrix< scalar_t > &  A,
Matrix< scalar_t > &  V,
int64_t  sweep,
int64_t  step 
)

Implements the tasks of tridiagonal bulge chasing.

Parameters
[in,out]AThe band Hermitian matrix A.
[out]VMatrix of Householder reflectors produced in the process. Dimension 2*band-by-... todo.
[in]sweepThe sweep number. One sweep eliminates one row and sweeps the entire matrix.
[in]stepThe step number. Steps in each sweep have consecutive numbers.

◆ hbmm()

template<Target target, typename scalar_t >
void slate::impl::hbmm ( Side  side,
scalar_t  alpha,
HermitianBandMatrix< scalar_t >  A,
Matrix< scalar_t >  B,
scalar_t  beta,
Matrix< scalar_t >  C,
Options const &  opts 
)

Distributed parallel Hermitian banded matrix-matrix multiplication. Generic implementation for any target. Dependencies enforce the following behavior:

  • bcast communications are serialized,
  • hbmm operations are serialized,
  • bcasts can get ahead of hbmms by the value of lookahead. Note A, B, and C are passed by value, so we can transpose if needed (for side = right) without affecting caller.

ColMajor layout is assumed

◆ hegst()

template<Target target, typename scalar_t >
void slate::impl::hegst ( int64_t  itype,
HermitianMatrix< scalar_t >  A,
HermitianMatrix< scalar_t >  B,
Options const &  opts 
)

Distributed parallel reduction of a complex Hermitian positive-definite generalized eigenvalue problem to the standard form.

Generic implementation for any target.

◆ her2k()

template<Target target, typename scalar_t >
void slate::impl::her2k ( scalar_t  alpha,
Matrix< scalar_t >  A,
Matrix< scalar_t >  B,
blas::real_type< scalar_t >  beta,
HermitianMatrix< scalar_t >  C,
Options const &  opts 
)

Distributed parallel Hermitian rank 2k update. Generic implementation for any target. Dependencies enforce the following behavior:

  • bcast communications are serialized,
  • her2k operations are serialized,
  • bcasts can get ahead of her2ks by the value of lookahead. Note A, B, and C are passed by value, so we can transpose if needed (for uplo = Upper) without affecting caller.

◆ hetrf()

template<Target target, typename scalar_t >
int64_t slate::impl::hetrf ( HermitianMatrix< scalar_t > &  A,
Pivots &  pivots,
BandMatrix< scalar_t > &  T,
Pivots &  pivots2,
Matrix< scalar_t > &  H,
Options const &  opts 
)

Distributed parallel Hermitian indefinite \(LTL^T\) factorization.

Generic implementation for any target. GPU version not yet implemented.

◆ norm()

template<Target target, typename matrix_type >
blas::real_type< typename matrix_type::value_type > slate::impl::norm ( Norm  in_norm,
matrix_type  A,
Options const &  opts 
)

Distributed parallel general matrix norm. Generic implementation for any target.

◆ pbtrf()

template<Target target, typename scalar_t >
int64_t slate::impl::pbtrf ( HermitianBandMatrix< scalar_t >  A,
Options const &  opts 
)

Distributed parallel band Cholesky factorization.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

Warning: ColMajor layout is assumed

◆ potrf()

template<Target target, typename scalar_t >
int64_t slate::impl::potrf ( slate::internal::TargetType< target >  ,
HermitianMatrix< scalar_t >  A,
Options const &  opts 
)

Distributed parallel Cholesky factorization.

Generic implementation for any target.

◆ scale() [1/2]

template<Target target, typename scalar_t >
void slate::impl::scale ( blas::real_type< scalar_t >  numer,
blas::real_type< scalar_t >  denom,
BaseTrapezoidMatrix< scalar_t > &  A,
Options const &  opts 
)

Set matrix entries. Generic implementation for any target.

◆ scale() [2/2]

template<Target target, typename scalar_t >
void slate::impl::scale ( blas::real_type< scalar_t >  numer,
blas::real_type< scalar_t >  denom,
Matrix< scalar_t > &  A,
Options const &  opts 
)

Scale matrix entries by the real scalar numer/denom. Generic implementation for any target.

◆ scale_row_col()

template<Target target, typename scalar_t , typename scalar_t2 >
void slate::impl::scale_row_col ( Equed  equed,
std::vector< scalar_t2 > const &  R,
std::vector< scalar_t2 > const &  C,
Matrix< scalar_t > &  A,
Options const &  opts 
)

Apply row or column scaling, or both, to a Matrix. Generic implementation for any target.

◆ set() [1/2]

template<Target target, typename scalar_t >
void slate::impl::set ( scalar_t  offdiag_value,
scalar_t  diag_value,
BaseTrapezoidMatrix< scalar_t > &  A,
Options const &  opts 
)

Set matrix entries. Generic implementation for any target.

◆ set() [2/2]

template<Target target, typename scalar_t >
void slate::impl::set ( scalar_t  offdiag_value,
scalar_t  diag_value,
Matrix< scalar_t > &  A,
Options const &  opts 
)

Set matrix entries. Generic implementation for any target.

◆ syr2k()

template<Target target, typename scalar_t >
void slate::impl::syr2k ( scalar_t  alpha,
Matrix< scalar_t >  A,
Matrix< scalar_t >  B,
scalar_t  beta,
SymmetricMatrix< scalar_t >  C,
Options const &  opts 
)

Distributed parallel symmetric rank 2k update. Generic implementation for any target. Dependencies enforce the following behavior:

  • bcast communications are serialized,
  • syr2k operations are serialized,
  • bcasts can get ahead of syr2ks by the value of lookahead. Note A, B, and C are passed by value, so we can transpose if needed (for uplo = Upper) without affecting caller.

◆ syrk()

template<Target target, typename scalar_t >
void slate::impl::syrk ( scalar_t  alpha,
Matrix< scalar_t >  A,
scalar_t  beta,
SymmetricMatrix< scalar_t >  C,
Options const &  opts 
)

Distributed parallel symmetric rank k update. Generic implementation for any target. Dependencies enforce the following behavior:

  • bcast communications are serialized,
  • syrk operations are serialized,
  • bcasts can get ahead of syrks by the value of lookahead. Note A and C are passed by value, so we can transpose if needed (for uplo = Upper) without affecting caller.

◆ tb2bd()

template<Target target, typename scalar_t >
void slate::impl::tb2bd ( TriangularBandMatrix< scalar_t > &  A,
Matrix< scalar_t > &  U,
Matrix< scalar_t > &  V,
Options const &  opts 
)

Reduces a band matrix to a bidiagonal matrix using bulge chasing.

◆ tb2bd_run()

template<typename scalar_t >
void slate::impl::tb2bd_run ( TriangularBandMatrix< scalar_t > &  A,
Matrix< scalar_t > &  U,
Matrix< scalar_t > &  V,
int64_t  band,
int64_t  diag_len,
int64_t  pass_size,
int  thread_rank,
int  thread_size,
Progress &  progress 
)

Implements multi-threaded bidiagonal bulge chasing.

Parameters
[in,out]AThe band matrix A.
[in]bandThe bandwidth of matrix A.
[in]diag_lenThe length of the diagonal.
[in]pass_sizeThe number of rows eliminated at a time.
[in]thread_rankrank of this thread
[in]thread_sizenumber of threads
[in]progressprogress table for synchronizing threads

◆ tb2bd_step()

template<typename scalar_t >
void slate::impl::tb2bd_step ( TriangularBandMatrix< scalar_t > &  A,
Matrix< scalar_t > &  U,
Matrix< scalar_t > &  V,
int64_t  band,
int64_t  sweep,
int64_t  step 
)

Implements the tasks of bidiagonal bulge chasing.

Parameters
[in,out]AThe band matrix A.
[out]UMatrix to store the householder vectors applied to the left of the band matrix A. U is 2*nb-by-nt*(nt + 1)/2*nb, where nb is the tile size (A.tileNb(0)) and nt is the number of A tiles (A.nt()). U Matrix need to be allocated on mpi rank 0 where the band A matrix is.
[out]VMatrix to store the householder vectors applied to the right of the band matrix A. V is 2*nb-by-nt*(nt + 1)/2*nb, where nb is the tile size (A.tileNb(0)) and nt is the number of A tiles (A.nt()). V Matrix need to be allocated on mpi rank 0 where the band A matrix is.
[in]bandThe bandwidth of matrix A.
[in]sweepThe sweep number. One sweep eliminates one row and sweeps the entire matrix.
[in]stepThe step number. Steps in each sweep have consecutive numbers.

◆ tbsm()

template<Target target, typename scalar_t >
void slate::impl::tbsm ( Side  side,
scalar_t  alpha,
TriangularBandMatrix< scalar_t >  A,
Pivots &  pivots,
Matrix< scalar_t >  B,
Options const &  opts 
)

Distributed parallel triangular matrix solve. Generic implementation for any target. Note A and B are passed by value, so we can transpose if needed (for side = right) without affecting caller.

◆ trmm()

template<Target target, typename scalar_t >
void slate::impl::trmm ( Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
Options const &  opts 
)

Distributed parallel triangular matrix-matrix multiplication. Generic implementation for any target.

◆ trsmB()

template<Target target, typename scalar_t >
void slate::impl::trsmB ( Side  side,
scalar_t  alpha,
TriangularMatrix< scalar_t > &  A,
Matrix< scalar_t > &  B,
Options const &  opts 
)

Distributed parallel triangular matrix solve. Generic implementation for any target.

◆ trtri()

template<Target target, typename scalar_t >
void slate::impl::trtri ( TriangularMatrix< scalar_t >  A,
Options const &  opts 
)

Distributed parallel inverse of a triangular matrix.

Generic implementation for any target. Panel and lookahead computed on host using Host OpenMP task.

◆ trtrm()

template<Target target, typename scalar_t >
void slate::impl::trtrm ( TriangularMatrix< scalar_t >  A,
Options const &  opts 
)

todo: update docs: multiply not inverse.

Distributed parallel inverse of a triangular matrix. Generic implementation for any target.

◆ unmlq()

template<Target target, typename scalar_t >
void slate::impl::unmlq ( Side  side,
Op  op,
Matrix< scalar_t > &  A,
TriangularFactors< scalar_t > &  T,
Matrix< scalar_t > &  C,
Options const &  opts 
)

Distributed parallel multiply by Q from LQ factorization.

Generic implementation for any target.

◆ unmqr()

template<Target target, typename scalar_t >
void slate::impl::unmqr ( Side  side,
Op  op,
Matrix< scalar_t > &  A,
TriangularFactors< scalar_t > &  T,
Matrix< scalar_t > &  C,
Options const &  opts 
)

Distributed parallel multiply by Q from QR factorization.

Generic implementation for any target.