Functions
template<Target target = Target::HostTask, typename scalar_t >
void	slate::internal::trsm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsm (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsm (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsm (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsm (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<Target target = Target::HostTask, typename scalar_t >
void	slate::internal::trsmA (Side side, scalar_t alpha, TriangularMatrix< scalar_t > &&A, Matrix< scalar_t > &&B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsmA (internal::TargetType< Target::HostTask >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsmA (internal::TargetType< Target::HostNest >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsmA (internal::TargetType< Target::HostBatch >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<typename scalar_t >
void	slate::internal::trsmA (internal::TargetType< Target::Devices >, Side side, scalar_t alpha, TriangularMatrix< scalar_t > &A, Matrix< scalar_t > &B, int priority, Layout layout, int64_t queue_index)
	Triangular solve matrix (multiple right-hand sides).

template<Target target = Target::HostTask, typename scalar_t >
void	slate::work::trsm (Side side, scalar_t alpha, TriangularMatrix< scalar_t > A, Matrix< scalar_t > B, uint8_t *row, Options const &opts)
	Triangular solve matrix (multiple right-hand sides).

template<Target target = Target::HostTask, typename scalar_t >
void	slate::work::trsmA (Side side, scalar_t alpha, TriangularMatrix< scalar_t > A, Matrix< scalar_t > B, uint8_t *row, Options const &opts)
	Triangular solve matrix (multiple right-hand sides).

Detailed Description

Function Documentation

◆ trsm() [1/6]

template<typename scalar_t >

void slate::internal::trsm	(	internal::TargetType< Target::Devices >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

GPU device batched cuBLAS implementation.

◆ trsm() [2/6]

template<typename scalar_t >

void slate::internal::trsm	(	internal::TargetType< Target::HostBatch >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

Host batched implementation.

◆ trsm() [3/6]

template<typename scalar_t >

void slate::internal::trsm	(	internal::TargetType< Target::HostNest >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

Host nested OpenMP implementation.

◆ trsm() [4/6]

template<typename scalar_t >

void slate::internal::trsm	(	internal::TargetType< Target::HostTask >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

Host OpenMP task implementation.

◆ trsm() [5/6]

template<Target target = Target::HostTask, typename scalar_t >

void slate::internal::trsm	(	Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &&	A,
		Matrix< scalar_t > &&	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

Dispatches to target implementations.

◆ trsm() [6/6]

template<Target target = Target::HostTask, typename scalar_t >

void slate::work::trsm	(	Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t >	A,
		Matrix< scalar_t >	B,
		uint8_t *	row,
		Options const &	opts
	)

Triangular solve matrix (multiple right-hand sides).

Note A and B are passed by value, so we can transpose if needed (for side = right) without affecting caller.

Template Parameters

target	One of HostTask, HostNest, HostBatch, Devices.
scalar_t	One of float, double, std::complex<float>, std::complex<double>.

Parameters

[in]	side	Whether A appears on the left or on the right of X: Side::Left: solve \(A X = \alpha B\) Side::Right: solve \(X A = \alpha B\)
[in]	alpha	The scalar alpha.
[in]	A	If side = left, the m-by-m triangular matrix A; if side = right, the n-by-n triangular matrix A.
[in,out]	B	On entry, the m-by-n matrix B. On exit, overwritten by the result X.
[in]	row	A raw pointer to a dummy vector data. The dummy vector is used for OpenMP dependencies tracking, not based on the actual data. Entries in the dummy vector represent each row of matrix \(B\). The size of row should be number of block columns of matrix \(A\).
[in]	lookahead	Number of blocks to overlap communication and computation. lookahead >= 0. Default 1.

◆ trsmA() [1/6]

template<typename scalar_t >

void slate::internal::trsmA	(	internal::TargetType< Target::Devices >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

GPU device batched cuBLAS implementation.

◆ trsmA() [2/6]

template<typename scalar_t >

void slate::internal::trsmA	(	internal::TargetType< Target::HostBatch >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

Host batched implementation.

◆ trsmA() [3/6]

template<typename scalar_t >

void slate::internal::trsmA	(	internal::TargetType< Target::HostNest >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

Host nested OpenMP implementation.

◆ trsmA() [4/6]

template<typename scalar_t >

void slate::internal::trsmA	(	internal::TargetType< Target::HostTask >	,
		Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &	A,
		Matrix< scalar_t > &	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

Host OpenMP task implementation.

◆ trsmA() [5/6]

template<Target target = Target::HostTask, typename scalar_t >

void slate::internal::trsmA	(	Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t > &&	A,
		Matrix< scalar_t > &&	B,
		int	priority,
		Layout	layout,
		int64_t	queue_index
	)

Triangular solve matrix (multiple right-hand sides).

We assume A is a single tile and that the calling rank owns it as well as it has [a copy of] all the tiles in B Dispatches to target implementations.

◆ trsmA() [6/6]

template<Target target = Target::HostTask, typename scalar_t >

void slate::work::trsmA	(	Side	side,
		scalar_t	alpha,
		TriangularMatrix< scalar_t >	A,
		Matrix< scalar_t >	B,
		uint8_t *	row,
		Options const &	opts
	)

Triangular solve matrix (multiple right-hand sides).

Note A and B are passed by value, so we can transpose if needed (for side = right) without affecting caller.

Template Parameters

target	One of HostTask, HostNest, HostBatch, Devices.
scalar_t	One of float, double, std::complex<float>, std::complex<double>.

Parameters

[in]	side	Whether A appears on the left or on the right of X: Side::Left: solve \(A X = \alpha B\) Side::Right: solve \(X A = \alpha B\)
[in]	alpha	The scalar alpha.
[in]	A	If side = left, the m-by-m triangular matrix A; if side = right, the n-by-n triangular matrix A.
[in,out]	B	On entry, the m-by-n matrix B. On exit, overwritten by the result X.
[in]	row	A raw pointer to a dummy vector data. The dummy vector is used for OpenMP dependencies tracking, not based on the actual data. Entries in the dummy vector represent each row of matrix \(B\). The size of row should be number of block columns of matrix \(A\).
[in]	lookahead	Number of blocks to overlap communication and computation. lookahead >= 0. Default 1.

Functions

Detailed Description

Function Documentation

◆ trsm() [1/6]

◆ trsm() [2/6]

◆ trsm() [3/6]

◆ trsm() [4/6]

◆ trsm() [5/6]

◆ trsm() [6/6]

◆ trsmA() [1/6]

◆ trsmA() [2/6]

◆ trsmA() [3/6]

◆ trsmA() [4/6]

◆ trsmA() [5/6]

◆ trsmA() [6/6]