Functions
template<typename scalar_t >
void	slate::tile::getrf (int64_t diag_len, int64_t ib, std::vector< Tile< scalar_t > > &tiles, std::vector< int64_t > &tile_indices, std::vector< AuxPivot< scalar_t > > &pivot, int mpi_rank, int mpi_root, MPI_Comm mpi_comm, int thread_rank, int thread_size, ThreadBarrier &thread_barrier, std::vector< scalar_t > &max_value, std::vector< int64_t > &max_index, std::vector< int64_t > &max_offset, std::vector< scalar_t > &top_block, blas::real_type< scalar_t > pivot_threshold, int64_t *info)
	Compute the LU factorization of a panel.

template<typename scalar_t >
void	slate::tile::getrf_nopiv (Tile< scalar_t > tile, int64_t ib, int64_t *info)
	Compute the LU factorization of a tile without pivoting.

template<typename scalar_t >
void	slate::tile::getrf_tntpiv_local (int64_t diag_len, int64_t ib, int stage, std::vector< Tile< scalar_t > > &tiles, std::vector< int64_t > &tile_indices, std::vector< std::vector< internal::AuxPivot< scalar_t > > > &aux_pivot, int mpi_rank, int thread_id, int thread_size, ThreadBarrier &thread_barrier, std::vector< scalar_t > &max_value, std::vector< int64_t > &max_index, std::vector< int64_t > &max_offset, std::vector< scalar_t > &top_block, int64_t *info)
	Compute the LU factorization of a local panel, for use in CALU tournament pivoting.

Detailed Description

Function Documentation

template<typename scalar_t >

Compute the LU factorization of a panel.

Parameters

[in]	diag_len	length of the panel diagonal
[in]	ib	internal blocking in the panel
[in,out]	tiles	local tiles in the panel
[in]	tile_indices	i indices of the tiles in the panel
[in,out]	pivot	pivots produced by the panel factorization
[in]	mpi_rank	MPI rank in the panel factorization
[in]	mpi_root	MPI rank of the root for the panel factorization
[in]	mpi_comm	MPI subcommunicator for the panel factorization
[in]	thread_rank	rank of this thread
[in]	thread_size	number of local threads
[in]	thread_barrier	barrier for synchronizing local threads
[out]	max_value	workspace for per-thread pivot value
[in]	max_index	workspace for per-thread pivot index
[in]	max_offset	workspace for per-thread pivot offset (pivot offset in the tile)
[in]	tob_block	workspace for broadcasting the top row for the geru operation and the top block for the gemm operation.
[in]	pivot_threshold	threshold for pivoting. 1 is partial pivoting, 0 is no pivoting
[in,out]	info	Exit status, updated by only the thread with the diagonal tile. 0: successful exit i > 0: U(i,i) is exactly zero (1-based index). The factorization has been completed but the factor U is exactly singular.

template<typename scalar_t >

Compute the LU factorization of a tile without pivoting.

Parameters

[in]	ib	internal blocking in the panel
[in,out]	tile	tile to factor
[in,out]	info	Exit status. 0: successful exit i > 0: U(i,i) is exactly zero (1-based index). The factorization will have NaN due to division by zero.

template<typename scalar_t >

void slate::tile::getrf_tntpiv_local	(	int64_t	diag_len,
		int64_t	ib,
		int	stage,
		std::vector< Tile< scalar_t > > &	tiles,
		std::vector< int64_t > &	tile_indices,
		std::vector< std::vector< internal::AuxPivot< scalar_t > > > &	aux_pivot,
		int	mpi_rank,
		int	thread_id,
		int	thread_size,
		ThreadBarrier &	thread_barrier,
		std::vector< scalar_t > &	max_value,
		std::vector< int64_t > &	max_index,
		std::vector< int64_t > &	max_offset,
		std::vector< scalar_t > &	top_block,
		int64_t *	info
	)

Compute the LU factorization of a local panel, for use in CALU tournament pivoting.

Parameters

[in]	diag_len	length of the panel diagonal
[in]	ib	internal blocking in the panel
[in]	stage	stage of the tree reduction
[in,out]	tiles	local tiles in the panel
[in]	tile_indices	i indices of the tiles in the panel
[in,out]	aux_pivot	pivots produced by the panel factorization, of dimension (2, mb).

For stage == 0, aux_pivot[ 0 ][ 0:mb-1 ] is used.

For stage == 1, aux_pivot[ 0 ][ 0:mb-1 ] contains pivot info for tile 0, aux_pivot[ 1 ][ 0:mb-1 ] contains pivot info for tile 1.

Parameters

[in]	mpi_rank	MPI rank in the panel factorization
[in]	thread_id	ID of this thread
[in]	thread_size	number of local threads
[in]	thread_barrier	barrier for synchronizing local threads
[out]	max_value	workspace for per-thread pivot value, of length thread_size.
[out]	max_index	workspace for per-thread pivot index, of length thread_size.
[out]	max_offset	workspace for per-thread pivot offset, of length thread_size. (pivot offset in the tile)
[out]	top_block	workspace for broadcasting the top row for the geru operation and the top block for the gemm operation.