SLATE 2024.05.31
Software for Linear Algebra Targeting Exascale
Loading...
Searching...
No Matches
slate::MatrixStorage< scalar_t > Class Template Reference

Slate::MatrixStorage class Used to store the map of distributed tiles. More...

#include <MatrixStorage.hh>

Public Types

typedef Tile< scalar_t > Tile_t
 
typedef TileNode< scalar_t > TileNode_t
 
using ijdev_tuple = std::tuple< int64_t, int64_t, int >
 
using ij_tuple = std::tuple< int64_t, int64_t >
 
using TilesMap = std::map< ij_tuple, std::shared_ptr< TileNode_t > >
 

Public Member Functions

 MatrixStorage (int64_t m, int64_t n, int64_t mb, int64_t nb, GridOrder order, int p, int q, MPI_Comm mpi_comm)
 
 MatrixStorage (int64_t mt, int64_t nt, std::function< int64_t(int64_t i)> &inTileMb, std::function< int64_t(int64_t j)> &inTileNb, std::function< int(ij_tuple ij)> &inTileRank, std::function< int(ij_tuple ij)> &inTileDevice, MPI_Comm mpi_comm)
 
 ~MatrixStorage ()
 Destructor deletes all tiles and frees workspace buffers.
 
 MatrixStorage (MatrixStorage &orig)=delete
 
 MatrixStorage (MatrixStorage &&orig)=delete
 
MatrixStorageoperator= (MatrixStorage &orig)=delete
 
MatrixStorageoperator= (MatrixStorage &&orig)=delete
 
lapack::Queue * comm_queue (int device)
 
lapack::Queue * compute_queue (int device, int queue_index)
 
int num_compute_queues ()
 
void allocateBatchArrays (int64_t batch_size, int64_t num_arrays)
 Allocates batch arrays and BLAS++ queues for all devices.
 
void clearBatchArrays ()
 Frees device batch arrays that were allocated by allocateBatchArrays().
 
int64_t batchArraySize () const
 
scalar_t ** batchArrayHost (int device, int64_t batch_arrays_index)
 
scalar_t ** batchArrayDevice (int device, int64_t batch_arrays_index)
 
void reserveHostWorkspace (int64_t num_tiles)
 Reserves num_tiles on host in allocator.
 
void reserveDeviceWorkspace (int64_t num_tiles)
 Reserves num_tiles on each device in allocator.
 
void ensureDeviceWorkspace (int device, int64_t num_tiles)
 Ensures there is unoccupied workspace for num_tiles on device in allocator.
 
void clearWorkspace ()
 Clears all host and device workspace tiles.
 
void releaseWorkspace ()
 Clears all host and device workspace tiles that are not OnHold.
 
scalar_t * allocWorkspaceBuffer (int device, int size)
 Allocates a memory block on device to be used as a workspace buffer, to be released with call to releaseWorkspaceBuffer()
 
void releaseWorkspaceBuffer (scalar_t *data, int device)
 Release the memory block indicated by data on device to the memory manager.
 
TileNode_t & at (ij_tuple ij)
 
Tile< scalar_t > * at (ijdev_tuple ijdev)
 
void erase (ijdev_tuple ijdev)
 Remove a tile instance from device and delete it unconditionally.
 
void erase (ij_tuple ij)
 Remove a tile with all instances on all devices from map and delete it unconditionally.
 
void release (ijdev_tuple ijdev)
 Remove a tile instance on device and delete it if it is a workspace and not OnHold.
 
void freeTileMemory (Tile< scalar_t > *tile)
 Return tiles allocated memory and extended memory to the memory factory.
 
void clear ()
 Delete all tiles.
 
omp_nest_lock_t * getTilesMapLock ()
 Return pointer to tiles-map OMP lock.
 
bool tileIsLocal (ij_tuple ij)
 
Tile< scalar_t > * tileInsert (ijdev_tuple ijdev, TileKind, Layout layout=Layout::ColMajor)
 Inserts tile {i, j} on given device, which can be host, allocating new memory for it.
 
Tile< scalar_t > * tileInsert (ijdev_tuple ijdev, scalar_t *data, int64_t lda, Layout layout=Layout::ColMajor)
 This is intended for inserting the original matrix.
 
bool tileExists (ijdev_tuple ijdev)
 
void tileMakeTransposable (Tile< scalar_t > *tile)
 Makes tile layout convertible by extending its data buffer.
 
void tileLayoutReset (Tile< scalar_t > *tile)
 Resets the extended tile.
 
int64_t tileReceiveCount (ij_tuple ij)
 
void tileIncrementReceiveCount (ij_tuple ij)
 Increment tile's receive counter.
 
void tileDecrementReceiveCount (ij_tuple ij, int64_t release_count=1)
 Decrement tile's receive counter.
 
void tilePrepareToReceive (ij_tuple ij, int device, Layout layout)
 Ensures the tile node exists and increments the receive count.
 
MOSI tileState (ijdev_tuple ijdev)
 Gets the state of the given tile.
 
MOSI tileOnHold (ijdev_tuple ijdev)
 Checks whether the given tile is on hold.
 
void tileUnsetHold (ijdev_tuple ijdev)
 Unsets any hold on the given tile.
 

Static Public Member Functions

static int num_devices ()
 

Public Attributes

std::function< int64_t(int64_t i)> tileMb
 
std::function< int64_t(int64_t j)> tileNb
 
std::function< int(ij_tuple ij)> tileRank
 
std::function< int(ij_tuple ij)> tileDevice
 

Protected Member Functions

void initQueues ()
 Initializes BLAS++ compute and communication queues on each device.
 
void destroyQueues ()
 Destroys BLAS++ compute and communication queues on each device.
 

Friends

class Debug
 

Detailed Description

template<typename scalar_t>
class slate::MatrixStorage< scalar_t >

Slate::MatrixStorage class Used to store the map of distributed tiles.

Template Parameters
scalar_tData type for the elements of the matrix

Member Function Documentation

◆ allocateBatchArrays()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::allocateBatchArrays ( int64_t  batch_size,
int64_t  num_arrays 
)

Allocates batch arrays and BLAS++ queues for all devices.

If arrays are already allocated, frees and reallocates the arrays only if batch_size is larger than the existing size.

Parameters
[in]batch_sizeAllocate batch arrays as needed so that size of each batch array >= batch_size >= 0.
[in]num_arraysAllocate batch arrays as needed so that number of batch arrays per device >= num_arrays >= 1.

◆ allocWorkspaceBuffer()

template<typename scalar_t >
scalar_t * slate::MatrixStorage< scalar_t >::allocWorkspaceBuffer ( int  device,
int  size 
)

Allocates a memory block on device to be used as a workspace buffer, to be released with call to releaseWorkspaceBuffer()

Returns
pointer to memory block on device
Parameters
[in]deviceDevice ID (GPU or Host) where the memory block is needed.
[in]sizeNumber of scalars needed in the memory block

◆ at() [1/2]

template<typename scalar_t >
TileNode_t & slate::MatrixStorage< scalar_t >::at ( ij_tuple  ij)
inline
Returns
reference to TileNode(i, j). Throws exception if entry doesn't exist.

◆ at() [2/2]

template<typename scalar_t >
Tile< scalar_t > * slate::MatrixStorage< scalar_t >::at ( ijdev_tuple  ijdev)
inline
Returns
pointer to an actual Tile object Throws exception if entry doesn't exist.

◆ batchArrayDevice()

template<typename scalar_t >
scalar_t ** slate::MatrixStorage< scalar_t >::batchArrayDevice ( int  device,
int64_t  batch_arrays_index 
)
inline
Returns
the batch array on device

◆ batchArrayHost()

template<typename scalar_t >
scalar_t ** slate::MatrixStorage< scalar_t >::batchArrayHost ( int  device,
int64_t  batch_arrays_index 
)
inline
Returns
the batch array on host, to send to device

◆ batchArraySize()

template<typename scalar_t >
int64_t slate::MatrixStorage< scalar_t >::batchArraySize ( ) const
inline
Returns
currently allocated batch array size

◆ comm_queue()

template<typename scalar_t >
lapack::Queue * slate::MatrixStorage< scalar_t >::comm_queue ( int  device)
inline
Returns
BLAS++ communication queues
Parameters
[in]deviceTile's device ID.

◆ compute_queue()

template<typename scalar_t >
lapack::Queue * slate::MatrixStorage< scalar_t >::compute_queue ( int  device,
int  queue_index 
)
inline
Returns
BLAS++ compute queues
Parameters
[in]deviceTile's device ID
[in]queue_indexThe index of a specific set of queues

◆ destroyQueues()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::destroyQueues
protected

Destroys BLAS++ compute and communication queues on each device.

As this is called in the destructor, it should NOT throw exceptions.

◆ erase() [1/2]

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::erase ( ij_tuple  ij)

Remove a tile with all instances on all devices from map and delete it unconditionally.

If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool.

◆ erase() [2/2]

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::erase ( ijdev_tuple  ijdev)

Remove a tile instance from device and delete it unconditionally.

If tile node becomes empty, deletes it. If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool.

◆ initQueues()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::initQueues
protected

Initializes BLAS++ compute and communication queues on each device.

Also initializes the host and device batch arrays. Called in constructor.

◆ num_compute_queues()

template<typename scalar_t >
int slate::MatrixStorage< scalar_t >::num_compute_queues ( )
inline
Returns
number of allocated BLAS++ compute queues

◆ release()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::release ( ijdev_tuple  ijdev)

Remove a tile instance on device and delete it if it is a workspace and not OnHold.

If tile node becomes empty, deletes it. If tile's memory was allocated by SLATE, then its memory is freed back to the allocator memory pool. For local tiles, it ensures that a valid copy remains. device can be AllDevices.

Note that local tiles are currently not released if it would leave all remaining tiles invalid, but this behavior may change in the future and should not be relied on.

◆ releaseWorkspace()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::releaseWorkspace

Clears all host and device workspace tiles that are not OnHold.

For local tiles, it ensures that a valid copy remains.

Note that local tiles are currently not released if it would leave all remaining tiles invalid, but this behavior may change in the future and should not be relied on.

◆ releaseWorkspaceBuffer()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::releaseWorkspaceBuffer ( scalar_t *  data,
int  device 
)

Release the memory block indicated by data on device to the memory manager.

Parameters
[in]dataPointer to memory block to be released.
[in]deviceDevice ID (GPU or Host) where the memory block is.

◆ tileInsert() [1/2]

template<typename scalar_t >
Tile< scalar_t > * slate::MatrixStorage< scalar_t >::tileInsert ( ijdev_tuple  ijdev,
scalar_t *  data,
int64_t  lda,
Layout  layout = Layout::ColMajor 
)

This is intended for inserting the original matrix.

Inserts tile {i, j} on given device, which can be host, wrapping existing memory for it. Sets tile kind = TileKind::UserOwned. This will be the origin tile, thus TileNode(i, j) should not pre-exist.

Returns
Pointer to newly inserted Tile.

◆ tileInsert() [2/2]

template<typename scalar_t >
Tile< scalar_t > * slate::MatrixStorage< scalar_t >::tileInsert ( ijdev_tuple  ijdev,
TileKind  kind,
Layout  layout = Layout::ColMajor 
)

Inserts tile {i, j} on given device, which can be host, allocating new memory for it.

Creates TileNode(i, j) if not already exists. Tile kind should be either TileKind::Workspace or TileKind::SlateOwned.

Returns
Pointer to newly inserted Tile.

◆ tileIsLocal()

template<typename scalar_t >
bool slate::MatrixStorage< scalar_t >::tileIsLocal ( ij_tuple  ij)
inline
Returns
whether tile {i, j} is local.

◆ tileLayoutReset()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::tileLayoutReset ( Tile< scalar_t > *  tile)

Resets the extended tile.

Frees the extended buffer and returns to memory manager then resets the tile's extended member fields

Parameters
[in,out]tilePointer to extended tile.

◆ tileMakeTransposable()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::tileMakeTransposable ( Tile< scalar_t > *  tile)

Makes tile layout convertible by extending its data buffer.

Attaches an auxiliary buffer to hold the transposed data when needed.

Parameters
[in,out]tilePointer to tile to extend its data buffer.

◆ tilePrepareToReceive()

template<typename scalar_t >
void slate::MatrixStorage< scalar_t >::tilePrepareToReceive ( ij_tuple  ij,
int  device,
Layout  layout 
)
inline

Ensures the tile node exists and increments the receive count.

If the tile node doesn't exist (i.e., no tile on any device), this inserts a new tile on the given device, which may be host. This does not currently ensure that a tile exists on the given device.

◆ tileReceiveCount()

template<typename scalar_t >
int64_t slate::MatrixStorage< scalar_t >::tileReceiveCount ( ij_tuple  ij)
inline
Returns
tile's receive counter.

The documentation for this class was generated from the following file: