Functions
template<typename scalar_t >
void	slate::internal::gerbt (Matrix< scalar_t > A11, Matrix< scalar_t > A12, Matrix< scalar_t > A21, Matrix< scalar_t > A22, Matrix< scalar_t > U1, Matrix< scalar_t > U2, Matrix< scalar_t > V1, Matrix< scalar_t > V2)
	Applies a single butterfly matrix to each side of A.

template<typename scalar_t >
void	slate::internal::gerbt (Side side, Op trans, Matrix< scalar_t > B1, Matrix< scalar_t > B2, Matrix< scalar_t > U1, Matrix< scalar_t > U2)
	Applies a single butterfly matrix to one side of B.

template<typename scalar_t >
void	slate::internal::getrf_panel (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, blas::real_type< scalar_t > pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
	LU factorization of a column of tiles, host implementation.

template<Target target = Target::HostTask, typename scalar_t >
void	slate::internal::getrf_panel (Matrix< scalar_t > &&A, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, blas::real_type< scalar_t > pivot_threshold, int max_panel_threads, int priority, int tag, int64_t *info)
	LU factorization of a column of tiles.

template<Target target = Target::HostTask, typename scalar_t >
void	slate::internal::getrf_nopiv (Matrix< scalar_t > &&A, int64_t ib, int priority, int64_t *info)
	LU factorization of single tile without pivoting.

template<typename scalar_t >
void	slate::internal::getrf_nopiv (internal::TargetType< Target::HostTask >, Matrix< scalar_t > &A, int64_t ib, int priority, int64_t *info)
	LU factorization of single tile without pivoting, host implementation.

template<typename scalar_t >
void	slate::internal::getrf_tntpiv_local (internal::TargetType< Target::HostTask >, std::vector< Tile< scalar_t > > &tiles, std::vector< char * > dwork_array, size_t dwork_bytes, int mlocal, int device, lapack::Queue queue, int64_t diag_len, int64_t ib, int stage, int64_t mb, int64_t nb, std::vector< int64_t > &tile_indices, std::vector< std::vector< AuxPivot< scalar_t > > > &aux_pivot, int mpi_rank, int max_panel_threads, int priority, int64_t info)
	Multi-threaded LU factorization of local tiles.

template<Target target, typename scalar_t >
void	slate::internal::getrf_tntpiv_panel (internal::TargetType< target >, Matrix< scalar_t > &A, Matrix< scalar_t > &Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
	LU factorization of a column of tiles.

template<Target target = Target::HostTask, typename scalar_t >
void	slate::internal::getrf_tntpiv_panel (Matrix< scalar_t > &&A, Matrix< scalar_t > &&Awork, std::vector< char * > dwork_array, size_t work_bytes, int64_t diag_len, int64_t ib, std::vector< Pivot > &pivot, int max_panel_threads, int priority, int64_t *info)
	LU factorization of a column of tiles.

template<typename scalar_t >
void	slate::internal::rbt_fill (Matrix< scalar_t > &U, const int64_t seed)
	Allocates and fills a random butterfly transform in packed storage.

template<typename scalar_t >
std::pair< Matrix< scalar_t >, Matrix< scalar_t > >	slate::internal::rbt_generate (const Matrix< scalar_t > &A, const int64_t d, const int64_t seed)
	Constructs two random butterfly matrices in packed storage to transform the given matrix.

Detailed Description

Function Documentation

◆ gerbt() [1/2]

template<typename scalar_t >

void slate::internal::gerbt	(	Matrix< scalar_t >	A11,
		Matrix< scalar_t >	A12,
		Matrix< scalar_t >	A21,
		Matrix< scalar_t >	A22,
		Matrix< scalar_t >	U1,
		Matrix< scalar_t >	U2,
		Matrix< scalar_t >	V1,
		Matrix< scalar_t >	V2
	)

Applies a single butterfly matrix to each side of A.

The matrices are divided into the submatrices along the halfs of the butterfly matrices.

◆ gerbt() [2/2]

template<typename scalar_t >

void slate::internal::gerbt	(	Side	side,
		Op	trans,
		Matrix< scalar_t >	B1,
		Matrix< scalar_t >	B2,
		Matrix< scalar_t >	U1,
		Matrix< scalar_t >	U2
	)

Applies a single butterfly matrix to one side of B.

The matrices are divided into the submatrices along the half of the butterfly matrix.

◆ getrf_nopiv() [1/2]

template<typename scalar_t >

void slate::internal::getrf_nopiv	(	internal::TargetType< Target::HostTask >	,
		Matrix< scalar_t > &	A,
		int64_t	ib,
		int	priority,
		int64_t *	info
	)

LU factorization of single tile without pivoting, host implementation.

Parameters

[in,out]

info

Exit status.

0: successful exit
i > 0: U(i,i) is exactly zero (1-based index). The factorization will have NaN due to division by zero.

◆ getrf_nopiv() [2/2]

template<Target target = Target::HostTask, typename scalar_t >

void slate::internal::getrf_nopiv	(	Matrix< scalar_t > &&	A,
		int64_t	ib,
		int	priority,
		int64_t *	info
	)

LU factorization of single tile without pivoting.

Dispatches to target implementations.

◆ getrf_panel()

template<Target target = Target::HostTask, typename scalar_t >

void slate::internal::getrf_panel	(	Matrix< scalar_t > &&	A,
		int64_t	diag_len,
		int64_t	ib,
		std::vector< Pivot > &	pivot,
		blas::real_type< scalar_t >	pivot_threshold,
		int	max_panel_threads,
		int	priority,
		int	tag,
		int64_t *	info
	)

LU factorization of a column of tiles.

Dispatches to target implementations.

◆ getrf_tntpiv_local()

template<typename scalar_t >

void slate::internal::getrf_tntpiv_local	(	internal::TargetType< Target::HostTask >	,
		std::vector< Tile< scalar_t > > &	tiles,
		std::vector< char * >	dwork_array,
		size_t	dwork_bytes,
		int	mlocal,
		int	device,
		lapack::Queue *	queue,
		int64_t	diag_len,
		int64_t	ib,
		int	stage,
		int64_t	mb,
		int64_t	nb,
		std::vector< int64_t > &	tile_indices,
		std::vector< std::vector< AuxPivot< scalar_t > > > &	aux_pivot,
		int	mpi_rank,
		int	max_panel_threads,
		int	priority,
		int64_t *	info
	)

Multi-threaded LU factorization of local tiles.

Parameters

[in]	target	Target for dispatch to correct implementation.
[in,out]	tiles	List of tiles to factor on the CPU.
[in,out]	dwork_array	Array of GPU device workspaces, dimension (num_devices). dwork_array[ dev ] stores dA, dwork, dipiv, and dinfo on GPU dev; dA is contiguous copy of tiles on GPU, dwork is getrf workspace, dipiv is pivot vector, dinfo is getrf return value.
[in]	dwork_bytes	Total size of dwork_array[ dev ] in bytes for each GPU device.
[in]	mlocal	Number of rows in dwork_array.
[in]	device	Device performing factorization, needed for pointing to correct memory in dwork_array. Device == HostNum for CPU implementation.
[in]	queue	Queue associated to input device.
[in]	diag_len	Length of diagonal, min( mb, nb ) of diagonal tile.
[in]	ib	Inner blocking.
[in]	stage	Stage = 0 is initial local tiles, stage = 1 is subsequent tournament.
[in]	mb	Tile row block size.
[in]	nb	Tile column block size.
[in]	tile_indices	Block row indices of tiles in tiles array.
[in]	mpi_rank	MPI rank of this process.
[in]	max_panel_threads	Maximum number of threads to launch for local panel.
[in]	priority	OpenMP priority. todo: unused. Should it be on taskloop?

◆ getrf_tntpiv_panel()

template<Target target = Target::HostTask, typename scalar_t >

void slate::internal::getrf_tntpiv_panel	(	Matrix< scalar_t > &&	A,
		Matrix< scalar_t > &&	Awork,
		std::vector< char * >	dwork_array,
		size_t	work_bytes,
		int64_t	diag_len,
		int64_t	ib,
		std::vector< Pivot > &	pivot,
		int	max_panel_threads,
		int	priority,
		int64_t *	info
	)

LU factorization of a column of tiles.

Dispatches to target implementations.

◆ rbt_fill()

template<typename scalar_t >

void slate::internal::rbt_fill	(	Matrix< scalar_t > &	U,
		const int64_t	seed
	)

Allocates and fills a random butterfly transform in packed storage.

The depth is computed based on the number of column in U.

◆ rbt_generate()

template<typename scalar_t >

std::pair< Matrix< scalar_t >, Matrix< scalar_t > > slate::internal::rbt_generate	(	const Matrix< scalar_t > &	A,
		const int64_t	d,
		const int64_t	seed
	)

Constructs two random butterfly matrices in packed storage to transform the given matrix.

Parameters

[in]	A	The matrix to be transformed
[in]	d	The depth of the transform
[in]	seed	A seed for controlling the random number generation

Returns: a tuple containing the left and right transforms

Functions

Detailed Description

Function Documentation

◆ gerbt() [1/2]

◆ gerbt() [2/2]

◆ getrf_nopiv() [1/2]

◆ getrf_nopiv() [2/2]

◆ getrf_panel()

◆ getrf_tntpiv_local()

◆ getrf_tntpiv_panel()

◆ rbt_fill()

◆ rbt_generate()