Slate::MatrixStorage class Used to store the map of distributed tiles. More...

#include <MatrixStorage.hh>

Public Types
typedef Tile< scalar_t >	Tile_t

typedef TileNode< scalar_t >	TileNode_t

using	ijdev_tuple = std::tuple< int64_t, int64_t, int >

using	ij_tuple = std::tuple< int64_t, int64_t >

using	TilesMap = std::map< ij_tuple, std::shared_ptr< TileNode_t > >

Public Member Functions
	MatrixStorage (int64_t m, int64_t n, int64_t mb, int64_t nb, GridOrder order, int p, int q, MPI_Comm mpi_comm)

	MatrixStorage (int64_t mt, int64_t nt, std::function< int64_t(int64_t i)> &inTileMb, std::function< int64_t(int64_t j)> &inTileNb, std::function< int(ij_tuple ij)> &inTileRank, std::function< int(ij_tuple ij)> &inTileDevice, MPI_Comm mpi_comm)

	~MatrixStorage ()
	Destructor deletes all tiles and frees workspace buffers.

	MatrixStorage (MatrixStorage &orig)=delete

	MatrixStorage (MatrixStorage &&orig)=delete

MatrixStorage &	operator= (MatrixStorage &orig)=delete

MatrixStorage &	operator= (MatrixStorage &&orig)=delete

lapack::Queue *	comm_queue (int device)

lapack::Queue *	compute_queue (int device, int queue_index)

int	num_compute_queues ()

void	allocateBatchArrays (int64_t batch_size, int64_t num_arrays)
	Allocates batch arrays and BLAS++ queues for all devices.

void	clearBatchArrays ()
	Frees device batch arrays that were allocated by allocateBatchArrays().

int64_t	batchArraySize () const

scalar_t **	batchArrayHost (int device, int64_t batch_arrays_index)

scalar_t **	batchArrayDevice (int device, int64_t batch_arrays_index)

void	reserveHostWorkspace (int64_t num_tiles)
	Reserves num_tiles on host in allocator.

void	reserveDeviceWorkspace (int64_t num_tiles)
	Reserves num_tiles on each device in allocator.

void	ensureDeviceWorkspace (int device, int64_t num_tiles)
	Ensures there is unoccupied workspace for num_tiles on device in allocator.

void	clearWorkspace ()
	Clears all host and device workspace tiles.

void	releaseWorkspace ()
	Clears all host and device workspace tiles that are not OnHold.

scalar_t *	allocWorkspaceBuffer (int device, int size)
	Allocates a memory block on device to be used as a workspace buffer, to be released with call to releaseWorkspaceBuffer()

void	releaseWorkspaceBuffer (scalar_t *data, int device)
	Release the memory block indicated by data on device to the memory manager.

TileNode_t &	at (ij_tuple ij)

Tile< scalar_t > *	at (ijdev_tuple ijdev)

void	erase (ijdev_tuple ijdev)
	Remove a tile instance from device and delete it unconditionally.

void	erase (ij_tuple ij)
	Remove a tile with all instances on all devices from map and delete it unconditionally.

void	release (ijdev_tuple ijdev)
	Remove a tile instance on device and delete it if it is a workspace and not OnHold.

void	freeTileMemory (Tile< scalar_t > *tile)
	Return tiles allocated memory and extended memory to the memory factory.

void	clear ()
	Delete all tiles.

omp_nest_lock_t *	getTilesMapLock ()
	Return pointer to tiles-map OMP lock.

bool	tileIsLocal (ij_tuple ij)

Tile< scalar_t > *	tileInsert (ijdev_tuple ijdev, TileKind, Layout layout=Layout::ColMajor)
	Inserts tile {i, j} on given device, which can be host, allocating new memory for it.

Tile< scalar_t > *	tileInsert (ijdev_tuple ijdev, scalar_t *data, int64_t lda, Layout layout=Layout::ColMajor)
	This is intended for inserting the original matrix.

bool	tileExists (ijdev_tuple ijdev)

void	tileMakeTransposable (Tile< scalar_t > *tile)
	Makes tile layout convertible by extending its data buffer.

void	tileLayoutReset (Tile< scalar_t > *tile)
	Resets the extended tile.

int64_t	tileReceiveCount (ij_tuple ij)

void	tileIncrementReceiveCount (ij_tuple ij)
	Increment tile's receive counter.

void	tileDecrementReceiveCount (ij_tuple ij, int64_t release_count=1)
	Decrement tile's receive counter.

void	tilePrepareToReceive (ij_tuple ij, int device, Layout layout)
	Ensures the tile node exists and increments the receive count.

MOSI	tileState (ijdev_tuple ijdev)
	Gets the state of the given tile.

MOSI	tileOnHold (ijdev_tuple ijdev)
	Checks whether the given tile is on hold.

void	tileUnsetHold (ijdev_tuple ijdev)
	Unsets any hold on the given tile.

Static Public Member Functions
static int	num_devices ()

Public Attributes
std::function< int64_t(int64_t i)>	tileMb

std::function< int64_t(int64_t j)>	tileNb

std::function< int(ij_tuple ij)>	tileRank

std::function< int(ij_tuple ij)>	tileDevice

Protected Member Functions
void	initQueues ()
	Initializes BLAS++ compute and communication queues on each device.

void	destroyQueues ()
	Destroys BLAS++ compute and communication queues on each device.

Friends
class	Debug

Detailed Description

template<typename scalar_t>
class slate::MatrixStorage< scalar_t >

Slate::MatrixStorage class Used to store the map of distributed tiles.

Template Parameters

scalar_t Data type for the elements of the matrix

Member Function Documentation

◆ allocateBatchArrays()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::allocateBatchArrays	(	int64_t	batch_size,
		int64_t	num_arrays
	)

Allocates batch arrays and BLAS++ queues for all devices.

If arrays are already allocated, frees and reallocates the arrays only if batch_size is larger than the existing size.

Parameters

[in]	batch_size	Allocate batch arrays as needed so that size of each batch array >= batch_size >= 0.
[in]	num_arrays	Allocate batch arrays as needed so that number of batch arrays per device >= num_arrays >= 1.

◆ allocWorkspaceBuffer()

template<typename scalar_t >

scalar_t * slate::MatrixStorage< scalar_t >::allocWorkspaceBuffer	(	int	device,
		int	size
	)

Allocates a memory block on device to be used as a workspace buffer, to be released with call to releaseWorkspaceBuffer()

Returns: pointer to memory block on device

Parameters

[in]	device	Device ID (GPU or Host) where the memory block is needed.
[in]	size	Number of scalars needed in the memory block

◆ at() [1/2]

template<typename scalar_t >

TileNode_t & slate::MatrixStorage< scalar_t >::at ( ij_tuple ij )

inline

Returns: reference to TileNode(i, j). Throws exception if entry doesn't exist.

◆ at() [2/2]

template<typename scalar_t >

Tile< scalar_t > * slate::MatrixStorage< scalar_t >::at ( ijdev_tuple ijdev )

inline

Returns: pointer to an actual Tile object Throws exception if entry doesn't exist.

◆ batchArrayDevice()

template<typename scalar_t >

scalar_t ** slate::MatrixStorage< scalar_t >::batchArrayDevice	(	int	device,
		int64_t	batch_arrays_index
	)

inline

Returns: the batch array on device

◆ batchArrayHost()

template<typename scalar_t >

scalar_t ** slate::MatrixStorage< scalar_t >::batchArrayHost	(	int	device,
		int64_t	batch_arrays_index
	)

inline

Returns: the batch array on host, to send to device

◆ batchArraySize()

template<typename scalar_t >

int64_t slate::MatrixStorage< scalar_t >::batchArraySize ( ) const

inline

Returns: currently allocated batch array size

◆ comm_queue()

template<typename scalar_t >

lapack::Queue * slate::MatrixStorage< scalar_t >::comm_queue ( int device )

inline

Returns: BLAS++ communication queues

Parameters

[in] device Tile's device ID.

◆ compute_queue()

template<typename scalar_t >

lapack::Queue * slate::MatrixStorage< scalar_t >::compute_queue	(	int	device,
		int	queue_index
	)

inline

Returns: BLAS++ compute queues

Parameters

[in]	device	Tile's device ID
[in]	queue_index	The index of a specific set of queues

◆ destroyQueues()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::destroyQueues

protected

Destroys BLAS++ compute and communication queues on each device.

As this is called in the destructor, it should NOT throw exceptions.

◆ erase() [1/2]

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::erase ( ij_tuple ij )

Remove a tile with all instances on all devices from map and delete it unconditionally.

If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool.

◆ erase() [2/2]

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::erase ( ijdev_tuple ijdev )

Remove a tile instance from device and delete it unconditionally.

If tile node becomes empty, deletes it. If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool.

◆ initQueues()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::initQueues

protected

Initializes BLAS++ compute and communication queues on each device.

Also initializes the host and device batch arrays. Called in constructor.

◆ num_compute_queues()

template<typename scalar_t >

int slate::MatrixStorage< scalar_t >::num_compute_queues ( )

inline

Returns: number of allocated BLAS++ compute queues

◆ release()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::release ( ijdev_tuple ijdev )

Remove a tile instance on device and delete it if it is a workspace and not OnHold.

If tile node becomes empty, deletes it. If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool. For local tiles, it ensures that a valid copy remains. device can be AllDevices.

Note that local tiles are currently not released if it would leave all remaining tiles invalid, but this behavior may change in the future and should not be relied on.

◆ releaseWorkspace()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::releaseWorkspace

Clears all host and device workspace tiles that are not OnHold.

For local tiles, it ensures that a valid copy remains.

Note that local tiles are currently not released if it would leave all remaining tiles invalid, but this behavior may change in the future and should not be relied on.

◆ releaseWorkspaceBuffer()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::releaseWorkspaceBuffer	(	scalar_t *	data,
		int	device
	)

Release the memory block indicated by data on device to the memory manager.

Parameters

[in]	data	Pointer to memory block to be released.
[in]	device	Device ID (GPU or Host) where the memory block is.

◆ tileInsert() [1/2]

template<typename scalar_t >

Tile< scalar_t > * slate::MatrixStorage< scalar_t >::tileInsert	(	ijdev_tuple	ijdev,
		scalar_t *	data,
		int64_t	lda,
		Layout	layout = `Layout::ColMajor`
	)

This is intended for inserting the original matrix.

Inserts tile {i, j} on given device, which can be host, wrapping existing memory for it. Sets tile kind = TileKind::UserOwned. This will be the origin tile, thus TileNode(i, j) should not pre-exist.

Returns: Pointer to newly inserted Tile.

◆ tileInsert() [2/2]

template<typename scalar_t >

Tile< scalar_t > * slate::MatrixStorage< scalar_t >::tileInsert	(	ijdev_tuple	ijdev,
		TileKind	kind,
		Layout	layout = `Layout::ColMajor`
	)

Inserts tile {i, j} on given device, which can be host, allocating new memory for it.

Creates TileNode(i, j) if not already exists. Tile kind should be either TileKind::Workspace or TileKind::SlateOwned.

Returns: Pointer to newly inserted Tile.

◆ tileIsLocal()

template<typename scalar_t >

bool slate::MatrixStorage< scalar_t >::tileIsLocal ( ij_tuple ij )

inline

Returns: whether tile {i, j} is local.

◆ tileLayoutReset()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::tileLayoutReset ( Tile< scalar_t > * tile )

Resets the extended tile.

Frees the extended buffer and returns to memory manager then resets the tile's extended member fields

Parameters

[in,out] tile Pointer to extended tile.

◆ tileMakeTransposable()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::tileMakeTransposable ( Tile< scalar_t > * tile )

Makes tile layout convertible by extending its data buffer.

Attaches an auxiliary buffer to hold the transposed data when needed.

Parameters

[in,out] tile Pointer to tile to extend its data buffer.

◆ tilePrepareToReceive()

template<typename scalar_t >

void slate::MatrixStorage< scalar_t >::tilePrepareToReceive	(	ij_tuple	ij,
		int	device,
		Layout	layout
	)

inline

Ensures the tile node exists and increments the receive count.

If the tile node doesn't exist (i.e., no tile on any device), this inserts a new tile on the given device, which may be host. This does not currently ensure that a tile exists on the given device.

◆ tileReceiveCount()

template<typename scalar_t >

int64_t slate::MatrixStorage< scalar_t >::tileReceiveCount ( ij_tuple ij )

inline

Returns: tile's receive counter.

The documentation for this class was generated from the following file:

include/slate/internal/MatrixStorage.hh

Public Types

Public Member Functions

Static Public Member Functions

Public Attributes

Protected Member Functions

Friends

Detailed Description

Member Function Documentation

◆ allocateBatchArrays()

◆ allocWorkspaceBuffer()

◆ at() [1/2]

◆ at() [2/2]

◆ batchArrayDevice()

◆ batchArrayHost()

◆ batchArraySize()

◆ comm_queue()

◆ compute_queue()

◆ destroyQueues()

◆ erase() [1/2]

◆ erase() [2/2]

◆ initQueues()

◆ num_compute_queues()

◆ release()

◆ releaseWorkspace()

◆ releaseWorkspaceBuffer()

◆ tileInsert() [1/2]

◆ tileInsert() [2/2]

◆ tileIsLocal()

◆ tileLayoutReset()

◆ tileMakeTransposable()

◆ tilePrepareToReceive()

◆ tileReceiveCount()