Snitch Runtime
Loading...
Searching...
No Matches
dma.h File Reference

This file provides functions to program the Snitch DMA. More...

#include <string.h>
#include <math.h>

Go to the source code of this file.

Typedefs

typedef uint32_t snrt_dma_txid_t
 A DMA transfer identifier.
 

Functions

static uint32_t snrt_dma_start_1d (uint64_t dst, uint64_t src, size_t size, const uint32_t channel=0)
 Start an asynchronous 1D DMA transfer with 64-bit wide pointers on a specific DMA channel.
 
static uint32_t snrt_dma_start_1d (volatile void *dst, volatile void *src, size_t size, const uint32_t channel=0)
 Start an asynchronous 1D DMA transfer using native-size pointers.
 
void snrt_dma_set_awuser (uint64_t field)
 Set AW user field of the DMA's AXI interface.
 
void snrt_dma_enable_multicast (uint64_t mask)
 Enable multicast for successive transfers.
 
void snrt_dma_enable_reduction (uint64_t mask, snrt_collective_opcode_t opcode)
 Enable reduction operations for successive transfers.
 
void snrt_dma_disable_multicast ()
 Disable multicast for successive transfers.
 
void snrt_dma_disable_reduction ()
 Disable reduction operations for successive transfers.
 
static uint32_t snrt_dma_start_1d_reduction (uint64_t dst, uint64_t src, size_t size, uint64_t mask, snrt_collective_opcode_t opcode, const uint32_t channel=0)
 Start an asynchronous reduction 1D DMA transfer with 64-bit wide pointers.
 
static uint32_t snrt_dma_start_1d_reduction (uint64_t dst, uint64_t src, size_t size, snrt_comm_t comm, snrt_collective_opcode_t opcode, const uint32_t channel=0)
 Start an asynchronous reduction 1D DMA transfer with 64-bit wide pointers.
 
static uint32_t snrt_dma_start_1d_mcast (uint64_t dst, uint64_t src, size_t size, uint64_t mask, const uint32_t channel=0)
 Start an asynchronous multicast 1D DMA transfer with 64-bit wide pointers.
 
static uint32_t snrt_dma_start_1d_mcast (uint64_t dst, uint64_t src, size_t size, snrt_comm_t comm, const uint32_t channel=0)
 Start an asynchronous multicast 1D DMA transfer with 64-bit wide pointers.
 
static uint32_t snrt_dma_start_1d_reduction (volatile void *dst, volatile void *src, size_t size, uint64_t mask, snrt_collective_opcode_t opcode, const uint32_t channel=0)
 Start an asynchronous reduction 1D DMA transfer using native-size pointers.
 
static uint32_t snrt_dma_start_1d_mcast (volatile void *dst, volatile void *src, size_t size, uint64_t mask, const uint32_t channel=0)
 Start an asynchronous multicast 1D DMA transfer using native-size pointers.
 
static snrt_dma_txid_t snrt_dma_start_2d (uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, const uint32_t channel=0)
 Start an asynchronous 2D DMA transfer with 64-bit wide pointers.
 
static uint32_t snrt_dma_start_2d (volatile void *dst, volatile void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, const uint32_t channel=0)
 Start an asynchronous 2D DMA transfer using native-size pointers.
 
static uint32_t snrt_dma_start_2d_mcast (uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t mask, const uint32_t channel=0)
 Start an asynchronous, multicast 2D DMA transfer with 64-bit wide pointers.
 
static uint32_t snrt_dma_start_2d_mcast (volatile void *dst, volatile void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t mask, const uint32_t channel=0)
 Start an asynchronous, multicast 2D DMA transfer using native-size pointers.
 
static void snrt_dma_wait (snrt_dma_txid_t txid, const uint32_t channel=0)
 Block until a DMA transfer finishes on a specific DMA channel.
 
static void snrt_dma_wait_all (const uint32_t channel=0)
 Block until a specific DMA channel is idle.
 
void snrt_dma_wait_all_channels (uint32_t num_channels)
 Block until the first num_channels channels are idle.
 
void snrt_dma_start_tracking ()
 Start tracking of dma performance region. Does not have any implications on the HW. Only injects a marker in the DMA traces that can be analyzed.
 
void snrt_dma_stop_tracking ()
 Stop tracking of dma performance region. Does not have any implications on the HW. Only injects a marker in the DMA traces that can be analyzed.
 
void snrt_dma_memset (void *ptr, uint8_t value, uint32_t len)
 Fast memset function performed by DMA.
 
snrt_dma_txid_t snrt_dma_load_1d_tile (volatile void *dst, volatile void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
 Load a tile of a 1D array.
 
snrt_dma_txid_t snrt_dma_load_1d_tile_mcast (void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec, uint64_t mask)
 Load a tile of a 1D array.
 
snrt_dma_txid_t snrt_dma_reduction_load_1d_tile (void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec, uint64_t mask, snrt_collective_opcode_t opcode)
 Load a tile of a 1D array.
 
snrt_dma_txid_t snrt_dma_1d_to_2d (volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
 Transfer and reshape a 1D array into a 2D array.
 
snrt_dma_txid_t snrt_dma_2d_to_1d (volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
 Transfer and reshape a 2D array into a 1D array.
 
snrt_dma_txid_t snrt_dma_store_1d_tile (void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
 Store a tile to a 1D array.
 
snrt_dma_txid_t snrt_dma_load_2d_tile (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
 Load a 2D tile of a 2D array.
 
snrt_dma_txid_t snrt_dma_load_2d_tile (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
 Load a 2D tile of a 2D array.
 
snrt_dma_txid_t snrt_dma_load_2d_tile_mcast (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld, uint32_t mask)
 Load a 2D tile of a 2D array using multicast.
 
snrt_dma_txid_t snrt_dma_load_2d_tile_mcast (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, uint32_t mask)
 Load a 2D tile of a 2D array.
 
snrt_dma_txid_t snrt_dma_load_2d_tile_mcast (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, snrt_comm_t comm)
 Load a 2D tile of a 2D array using multicast.
 
snrt_dma_txid_t snrt_dma_load_2d_tile_in_banks (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t num_banks)
 Load a 2D tile of a 2D array and reshape it to occupy a subset of TCDM banks.
 
snrt_dma_txid_t snrt_dma_store_2d_tile (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
 Store a 2D tile to a 2D array.
 
snrt_dma_txid_t snrt_dma_store_2d_tile (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
 Store a 2D tile of a 2D array.
 
snrt_dma_txid_t snrt_dma_store_2d_tile_from_banks (void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t num_banks)
 Store a 2D tile of a 2D array from a 1D layout occupying a subset of TCDM banks.
 

Detailed Description

This file provides functions to program the Snitch DMA.

Function Documentation

◆ snrt_dma_1d_to_2d()

snrt_dma_txid_t snrt_dma_1d_to_2d ( volatile void * dst,
volatile void * src,
size_t size,
size_t row_size,
size_t stride )
inline

Transfer and reshape a 1D array into a 2D array.

Parameters
dstPointer to the destination array.
srcPointer to the source array.
sizeNumber of bytes to transfer.
row_sizeSize of a row in the 2D array, in bytes.
strideStride between successive rows in the 2D array, in bytes.
506 {
507 return snrt_dma_start_2d(dst, src, row_size, stride, row_size,
508 size / row_size);
509}
static snrt_dma_txid_t snrt_dma_start_2d(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, const uint32_t channel=0)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers.
Definition dma.h:246

◆ snrt_dma_2d_to_1d()

snrt_dma_txid_t snrt_dma_2d_to_1d ( volatile void * dst,
volatile void * src,
size_t size,
size_t row_size,
size_t stride )
inline

Transfer and reshape a 2D array into a 1D array.

Parameters
dstPointer to the destination array.
srcPointer to the source array.
sizeNumber of bytes to transfer.
row_sizeSize of a row in the 2D array, in bytes.
strideStride between successive rows in the 2D array, in bytes.
521 {
522 return snrt_dma_start_2d(dst, src, row_size, row_size, stride,
523 size / row_size);
524}

◆ snrt_dma_disable_multicast()

void snrt_dma_disable_multicast ( )
inline

Disable multicast for successive transfers.

Successive DMA transfers will be unicast transfers

void snrt_dma_set_awuser(uint64_t field)
Set AW user field of the DMA's AXI interface.
Definition dma.h:80

◆ snrt_dma_disable_reduction()

void snrt_dma_disable_reduction ( )
inline

Disable reduction operations for successive transfers.

Successive DMA transfers will be unicast transfers

◆ snrt_dma_enable_multicast()

void snrt_dma_enable_multicast ( uint64_t mask)
inline

Enable multicast for successive transfers.

All transfers performed after this call will be multicast to all addresses specified by the address and mask pair.

Parameters
maskMulticast mask value
97 {
99 op.f.opcode = SNRT_COLLECTIVE_MULTICAST;
100 op.f.mask = mask;
102}
Definition sync_decls.h:40

◆ snrt_dma_enable_reduction()

void snrt_dma_enable_reduction ( uint64_t mask,
snrt_collective_opcode_t opcode )
inline

Enable reduction operations for successive transfers.

All transfers performed after this call will be part of a reduction involving all masters identified by the mask.

Parameters
maskMask defines all involved members
opcodeType of reduction operation
113 {
115 op.f.opcode = opcode;
116 op.f.mask = mask;
118}

◆ snrt_dma_load_1d_tile()

snrt_dma_txid_t snrt_dma_load_1d_tile ( volatile void * dst,
volatile void * src,
size_t tile_idx,
size_t tile_size,
uint32_t prec )
inline

Load a tile of a 1D array.

Parameters
dstPointer to the tile destination.
srcPointer to the source array.
tile_idxIndex of the tile in the 1D array.
tile_sizeNumber of elements within a tile of the 1D array.
precNumber of bytes of each element in the 1D array.
451 {
452 size_t tile_nbytes = tile_size * prec;
453 return snrt_dma_start_1d(
454 (uint64_t)dst, (uint64_t)src + tile_idx * tile_nbytes, tile_nbytes);
455}
static uint32_t snrt_dma_start_1d(uint64_t dst, uint64_t src, size_t size, const uint32_t channel=0)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers on a specific DMA channel.
Definition dma.h:35

◆ snrt_dma_load_1d_tile_mcast()

snrt_dma_txid_t snrt_dma_load_1d_tile_mcast ( void * dst,
void * src,
size_t tile_idx,
size_t tile_size,
uint32_t prec,
uint64_t mask )
inline

Load a tile of a 1D array.

Parameters
dstPointer to the tile destination.
srcPointer to the source array.
tile_idxIndex of the tile in the 1D array.
tile_sizeNumber of elements within a tile of the 1D array.
precNumber of bytes of each element in the 1D array.
maskMulticast mask applied on the destination address.
470 {
471 size_t tile_nbytes = tile_size * prec;
472 return snrt_dma_start_1d_mcast((uintptr_t)dst,
473 (uintptr_t)src + tile_idx * tile_nbytes,
474 tile_nbytes, mask);
475}
static uint32_t snrt_dma_start_1d_mcast(uint64_t dst, uint64_t src, size_t size, uint64_t mask, const uint32_t channel=0)
Start an asynchronous multicast 1D DMA transfer with 64-bit wide pointers.
Definition dma.h:173

◆ snrt_dma_load_2d_tile() [1/2]

snrt_dma_txid_t snrt_dma_load_2d_tile ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec )
inline

Load a 2D tile of a 2D array.

The stride in the destination tile is assumed to be that of a 1D tile, effectively. In other words, this is the same as snrt_dma_2d_to_1d().

See also
snrt_dma_load_2d_tile(void *, void *, size_t, size_t, size_t, size_t, size_t, uint32_t, size_t) for a detailed description of the parameters.
588 {
589 return snrt_dma_load_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
590 tile_x1_size, tile_x0_size, full_x0_size, prec,
591 tile_x0_size * prec);
592}
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Load a 2D tile of a 2D array.
Definition dma.h:557

◆ snrt_dma_load_2d_tile() [2/2]

snrt_dma_txid_t snrt_dma_load_2d_tile ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec,
size_t tile_ld )
inline

Load a 2D tile of a 2D array.

Parameters
dstPointer to the tile destination.
srcPointer to the source array.
tile_x1_idxOutermost coordinate of the tile in the 2D array.
tile_x0_idxInnermost coordinate of the tile in the 2D array.
tile_x1_sizeNumber of elements in the outermost dimension of the tile.
tile_x0_sizeNumber of elements in the innermost dimension of the tile.
full_x0_sizeNumber of elements in the innermost dimension of the array.
precNumber of bytes of each element in the 2D array.
tile_ldLeading dimension of the tile, in bytes.
560 {
561 size_t src_offset = 0;
562 // Advance src array in x0 and x1 dimensions, and convert to byte offset
563 src_offset += tile_x0_idx * tile_x0_size;
564 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
565 src_offset *= prec;
566 // Initiate transfer
567 return snrt_dma_start_2d((uint64_t)dst, // dst
568 (uint64_t)src + src_offset, // src
569 tile_x0_size * prec, // size
570 tile_ld, // dst_stride
571 full_x0_size * prec, // src_stride
572 tile_x1_size // repeat
573 );
574}

◆ snrt_dma_load_2d_tile_in_banks()

snrt_dma_txid_t snrt_dma_load_2d_tile_in_banks ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec,
size_t num_banks )
inline

Load a 2D tile of a 2D array and reshape it to occupy a subset of TCDM banks.

Parameters
dstPointer to the tile destination.
srcPointer to the source array.
tile_x1_idxOutermost coordinate of the tile in the 2D array.
tile_x0_idxInnermost coordinate of the tile in the 2D array.
tile_x1_sizeNumber of elements in the outermost dimension of the tile.
tile_x0_sizeNumber of elements in the innermost dimension of the tile.
full_x0_sizeNumber of elements in the innermost dimension of the array.
precNumber of bytes of each element in the 2D array.
num_banksNumber of banks to reshape the tile into.
678 {
679 // Calculate new tile size after reshaping the tile in the selected banks
680 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
681 size_t tile_x1_size_in_banks =
682 ceil((tile_x1_size * tile_x0_size) / (double)tile_x0_size_in_banks);
683 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
684 return snrt_dma_load_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
685 tile_x1_size_in_banks, tile_x0_size_in_banks,
686 full_x0_size, prec, tile_ld);
687}

◆ snrt_dma_load_2d_tile_mcast() [1/3]

snrt_dma_txid_t snrt_dma_load_2d_tile_mcast ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec,
size_t tile_ld,
uint32_t mask )
inline

Load a 2D tile of a 2D array using multicast.

Parameters
maskMulticast mask.
See also
snrt_dma_load_2d_tile(void *, void *, size_t, size_t, size_t, size_t, size_t, uint32_t, size_t) for a description of the other parameters.
604 {
605 size_t src_offset = 0;
606 // Advance src array in x0 and x1 dimensions, and convert to byte offset
607 src_offset += tile_x0_idx * tile_x0_size;
608 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
609 src_offset *= prec;
610 // Initiate transfer
611 return snrt_dma_start_2d_mcast((uint64_t)dst, // dst
612 (uint64_t)src + src_offset, // src
613 tile_x0_size * prec, // size
614 tile_ld, // dst_stride
615 full_x0_size * prec, // src_stride
616 tile_x1_size, // repeat
617 mask // mask
618 );
619}
static uint32_t snrt_dma_start_2d_mcast(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t mask, const uint32_t channel=0)
Start an asynchronous, multicast 2D DMA transfer with 64-bit wide pointers.
Definition dma.h:301

◆ snrt_dma_load_2d_tile_mcast() [2/3]

snrt_dma_txid_t snrt_dma_load_2d_tile_mcast ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec,
snrt_comm_t comm )
inline

Load a 2D tile of a 2D array using multicast.

Parameters
commCommunicator specifying which clusters to multicast to.

The stride in the destination tile is assumed to be that of a 1D tile, effectively. In other words, this is similar to snrt_dma_2d_to_1d().

See also
snrt_dma_load_2d_tile_mcast(void *, void *, size_t, size_t, size_t, size_t, size_t, uint32_t, size_t, uint32_t) for a detailed description of the parameters.
652 {
653 uint64_t mask = snrt_get_collective_mask(comm);
654 return snrt_dma_load_2d_tile_mcast(dst, src, tile_x1_idx, tile_x0_idx,
655 tile_x1_size, tile_x0_size, full_x0_size,
656 prec, mask);
657}
snrt_dma_txid_t snrt_dma_load_2d_tile_mcast(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld, uint32_t mask)
Load a 2D tile of a 2D array using multicast.
Definition dma.h:601

◆ snrt_dma_load_2d_tile_mcast() [3/3]

snrt_dma_txid_t snrt_dma_load_2d_tile_mcast ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec,
uint32_t mask )
inline

Load a 2D tile of a 2D array.

The stride in the destination tile is assumed to be that of a 1D tile, effectively. In other words, this is similar to snrt_dma_2d_to_1d().

See also
snrt_dma_load_2d_tile_mcast(void *, void *, size_t, size_t, size_t, size_t, size_t, uint32_t, size_t, uint32_t) for a detailed description of the parameters.
633 {
634 return snrt_dma_load_2d_tile_mcast(dst, src, tile_x1_idx, tile_x0_idx,
635 tile_x1_size, tile_x0_size, full_x0_size,
636 prec, tile_x0_size * prec, mask);
637}

◆ snrt_dma_memset()

void snrt_dma_memset ( void * ptr,
uint8_t value,
uint32_t len )
inline

Fast memset function performed by DMA.

Parameters
ptrPointer to the start of the region.
valueValue to set.
lenNumber of bytes, must be a multiple of the DMA bus width to use the DMA.
417 {
418#ifdef SNRT_SUPPORTS_DMA
419 // We set the first 64 bytes to the value, and then we use the DMA to copy
420 // these into the remaining memory region. DMA is used only if len is
421 // larger than 64 bytes, and an integer multiple of 64 bytes.
422 size_t n_1d_transfers = len / 64;
423 size_t use_dma = (len % 64) == 0 && len > 64;
424 uint8_t *p = (uint8_t *)ptr;
425
426 uint32_t nbytes = len < 64 || !use_dma ? len : 64;
427 while (nbytes--) {
428 *p++ = value;
429 }
430
431 if (use_dma) {
432 snrt_dma_start_2d(ptr, ptr, 64, 64, 0, n_1d_transfers);
434 }
435#else
436 memset(ptr, (int)value, len);
437#endif
438}
static void snrt_dma_wait_all(const uint32_t channel=0)
Block until a specific DMA channel is idle.
Definition dma.h:364

◆ snrt_dma_reduction_load_1d_tile()

snrt_dma_txid_t snrt_dma_reduction_load_1d_tile ( void * dst,
void * src,
size_t tile_idx,
size_t tile_size,
uint32_t prec,
uint64_t mask,
snrt_collective_opcode_t opcode )
inline

Load a tile of a 1D array.

Parameters
dstPointer to the tile destination.
srcPointer to the source array.
tile_idxIndex of the tile in the 1D array.
tile_sizeNumber of elements within a tile of the 1D array.
precNumber of bytes of each element in the 1D array.
maskMask for reduction operation.
opcodeReduction operation.
489 {
490 size_t tile_nbytes = tile_size * prec;
491 return snrt_dma_start_1d_reduction((uintptr_t)dst,
492 (uintptr_t)src + tile_idx * tile_nbytes,
493 tile_nbytes, mask, opcode);
494}
static uint32_t snrt_dma_start_1d_reduction(uint64_t dst, uint64_t src, size_t size, uint64_t mask, snrt_collective_opcode_t opcode, const uint32_t channel=0)
Start an asynchronous reduction 1D DMA transfer with 64-bit wide pointers.
Definition dma.h:140

◆ snrt_dma_set_awuser()

void snrt_dma_set_awuser ( uint64_t field)
inline

Set AW user field of the DMA's AXI interface.

All DMA transfers performed after this call are equipped with the given AW user field

Parameters
fieldDefines the AW user field for the AXI transfer
80 {
81#ifdef SNRT_SUPPORTS_DMA
82 uint32_t user_low = (uint32_t)(field);
83 uint32_t user_high = (uint32_t)(field >> 32);
84 asm volatile("dmuser %[user_low], %[user_high] \n"
85 :
86 : [ user_low ] "r"(user_low), [ user_high ] "r"(user_high));
87#endif
88}

◆ snrt_dma_start_1d() [1/2]

static uint32_t snrt_dma_start_1d ( uint64_t dst,
uint64_t src,
size_t size,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous 1D DMA transfer with 64-bit wide pointers on a specific DMA channel.

Parameters
dstThe destination address.
srcThe source address.
sizeThe size of the transfer in bytes.
channelThe index of the channel.
Returns
The DMA transfer ID.
Note
The function passes the channel argument as an immediate, thus this must be known at compile time. As a consequence, the function must use internal linkage (static keyword) and must be always inlined. This is true also for all functions invoking this function, and passing down an argument to channel.
37 {
38#ifdef SNRT_SUPPORTS_DMA
39 uint32_t dst_lo = dst & 0xFFFFFFFF;
40 uint32_t dst_hi = dst >> 32;
41 uint32_t src_lo = src & 0xFFFFFFFF;
42 uint32_t src_hi = src >> 32;
43 uint32_t txid;
44
45 asm volatile(
46 "dmsrc %[src_lo], %[src_hi] \n"
47 "dmdst %[dst_lo], %[dst_hi] \n"
48 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b00 \n"
49 : [ txid ] "=r"(txid)
50 : [ src_lo ] "r"(src_lo), [ src_hi ] "r"(src_hi),
51 [ dst_lo ] "r"(dst_lo), [ dst_hi ] "r"(dst_hi), [ size ] "r"(size),
52 [ channel ] "i"(channel));
53
54 return txid;
55#else
56 memcpy((void *)dst, (const void *)src, size);
57 return 0;
58#endif
59}

◆ snrt_dma_start_1d() [2/2]

static uint32_t snrt_dma_start_1d ( volatile void * dst,
volatile void * src,
size_t size,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous 1D DMA transfer using native-size pointers.

This is a convenience overload of snrt_dma_start_1d(uint64_t, uint64_t, size_t, uint32_t) using void* pointers.

69 {
70 return snrt_dma_start_1d((uint64_t)dst, (uint64_t)src, size, channel);
71}

◆ snrt_dma_start_1d_mcast() [1/3]

static uint32_t snrt_dma_start_1d_mcast ( uint64_t dst,
uint64_t src,
size_t size,
snrt_comm_t comm,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous multicast 1D DMA transfer with 64-bit wide pointers.

Parameters
commThe communicator for the multicast operation
See also
snrt_dma_start_1d(uint64_t, uint64_t, size_t, uint32_t) for a description of the other parameters.
191 {
192 uint64_t mask = snrt_get_collective_mask(comm);
193 uint32_t txid = snrt_dma_start_1d_mcast(dst, src, size, mask, channel);
194 return txid;
195}

◆ snrt_dma_start_1d_mcast() [2/3]

static uint32_t snrt_dma_start_1d_mcast ( uint64_t dst,
uint64_t src,
size_t size,
uint64_t mask,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous multicast 1D DMA transfer with 64-bit wide pointers.

Parameters
maskThe mask for the multicast operation
See also
snrt_dma_start_1d(uint64_t, uint64_t, size_t, uint32_t) for a description of the other parameters.
175 {
177 uint32_t txid = snrt_dma_start_1d(dst, src, size, channel);
179 return txid;
180}
void snrt_dma_disable_multicast()
Disable multicast for successive transfers.
Definition dma.h:124
void snrt_dma_enable_multicast(uint64_t mask)
Enable multicast for successive transfers.
Definition dma.h:97

◆ snrt_dma_start_1d_mcast() [3/3]

static uint32_t snrt_dma_start_1d_mcast ( volatile void * dst,
volatile void * src,
size_t size,
uint64_t mask,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous multicast 1D DMA transfer using native-size pointers.

This is a convenience overload of snrt_dma_start_1d_mcast(uint64_t, uint64_t, size_t, uint64_t, uint32_t) using void* pointers.

223 {
224 return snrt_dma_start_1d_mcast((uint64_t)dst, (uint64_t)src, size, mask,
225 channel);
226}

◆ snrt_dma_start_1d_reduction() [1/3]

static uint32_t snrt_dma_start_1d_reduction ( uint64_t dst,
uint64_t src,
size_t size,
snrt_comm_t comm,
snrt_collective_opcode_t opcode,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous reduction 1D DMA transfer with 64-bit wide pointers.

Parameters
commThe communicator for the reduction operation
opcodeReduction operation
See also
snrt_dma_start_1d(uint64_t, uint64_t, size_t, uint32_t) for a description of the other parameters.
159 {
160 uint64_t mask = snrt_get_collective_mask(comm);
161 uint32_t txid =
162 snrt_dma_start_1d_reduction(dst, src, size, mask, opcode, channel);
163 return txid;
164}

◆ snrt_dma_start_1d_reduction() [2/3]

static uint32_t snrt_dma_start_1d_reduction ( uint64_t dst,
uint64_t src,
size_t size,
uint64_t mask,
snrt_collective_opcode_t opcode,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous reduction 1D DMA transfer with 64-bit wide pointers.

Parameters
maskMask defines all involved members
opcodeReduction operation
See also
snrt_dma_start_1d(uint64_t, uint64_t, size_t, uint32_t) for a description of the other parameters.
142 {
143 snrt_dma_enable_reduction(mask, opcode);
144 uint32_t txid = snrt_dma_start_1d(dst, src, size, channel);
146 return txid;
147}
void snrt_dma_disable_reduction()
Disable reduction operations for successive transfers.
Definition dma.h:130
void snrt_dma_enable_reduction(uint64_t mask, snrt_collective_opcode_t opcode)
Enable reduction operations for successive transfers.
Definition dma.h:112

◆ snrt_dma_start_1d_reduction() [3/3]

static uint32_t snrt_dma_start_1d_reduction ( volatile void * dst,
volatile void * src,
size_t size,
uint64_t mask,
snrt_collective_opcode_t opcode,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous reduction 1D DMA transfer using native-size pointers.

This is a convenience overload of snrt_dma_start_1d_reduction(uint64_t, uint64_t, size_t, uint64_t, uint32_t, uint32_t) using void* pointers.

207 {
208 return snrt_dma_start_1d_reduction((uint64_t)dst, (uint64_t)src, size, mask,
209 opcode, channel);
210}

◆ snrt_dma_start_2d() [1/2]

static snrt_dma_txid_t snrt_dma_start_2d ( uint64_t dst,
uint64_t src,
size_t size,
size_t dst_stride,
size_t src_stride,
size_t repeat,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous 2D DMA transfer with 64-bit wide pointers.

Parameters
dstThe destination address.
srcThe source address.
sizeThe size of every 1D transfer within the 2D transfer in bytes.
dst_strideThe offset between consecutive 1D transfers at the destination, in bytes.
src_strideThe offset between consecutive 1D transfers at the source, in bytes.
repeatThe number of 1D transfers composing the 2D transfer.
channelThe index of the channel.
Returns
The DMA transfer ID.
Note
The function passes the channel argument as an immediate, thus this must be known at compile time. As a consequence, the function must use internal linkage (static keyword) and must be always inlined. This is true also for all functions invoking this function, and passing down an argument to channel.
250 {
251#ifdef SNRT_SUPPORTS_DMA
252 uint32_t dst_lo = dst & 0xFFFFFFFF;
253 uint32_t dst_hi = dst >> 32;
254 uint32_t src_lo = src & 0xFFFFFFFF;
255 uint32_t src_hi = src >> 32;
256 uint32_t txid;
257
258 asm volatile(
259 "dmsrc %[src_lo], %[src_hi] \n"
260 "dmdst %[dst_lo], %[dst_hi] \n"
261 "dmstr %[src_stride], %[dst_stride] \n"
262 "dmrep %[repeat] \n"
263 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b10 \n"
264 : [ txid ] "=r"(txid)
265 : [ src_lo ] "r"(src_lo), [ src_hi ] "r"(src_hi),
266 [ dst_lo ] "r"(dst_lo), [ dst_hi ] "r"(dst_hi),
267 [ dst_stride ] "r"(dst_stride), [ src_stride ] "r"(src_stride),
268 [ repeat ] "r"(repeat), [ size ] "r"(size), [ channel ] "i"(channel));
269
270 return txid;
271#else
272 // TODO(colluca): we can implement this as a series of memcpy calls
273 return 0;
274#endif
275}

◆ snrt_dma_start_2d() [2/2]

static uint32_t snrt_dma_start_2d ( volatile void * dst,
volatile void * src,
size_t size,
size_t dst_stride,
size_t src_stride,
size_t repeat,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous 2D DMA transfer using native-size pointers.

This is a convenience overload of snrt_dma_start_2d(uint64_t, uint64_t, size_t, size_t, size_t, size_t, uint32_t) using void* pointers.

287 {
288 return snrt_dma_start_2d((uint64_t)dst, (uint64_t)src, size, dst_stride,
289 src_stride, repeat, channel);
290}

◆ snrt_dma_start_2d_mcast() [1/2]

static uint32_t snrt_dma_start_2d_mcast ( uint64_t dst,
uint64_t src,
size_t size,
size_t dst_stride,
size_t src_stride,
size_t repeat,
uint32_t mask,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous, multicast 2D DMA transfer with 64-bit wide pointers.

Parameters
maskMulticast mask.
See also
snrt_dma_start_2d(uint64_t, uint64_t, size_t, size_t, size_t, size_t, uint32_t) for a description of the other parameters.
305 {
307 uint32_t txid = snrt_dma_start_2d(dst, src, size, dst_stride, src_stride,
308 repeat, channel);
310 return txid;
311}

◆ snrt_dma_start_2d_mcast() [2/2]

static uint32_t snrt_dma_start_2d_mcast ( volatile void * dst,
volatile void * src,
size_t size,
size_t dst_stride,
size_t src_stride,
size_t repeat,
uint32_t mask,
const uint32_t channel = 0 )
inlinestatic

Start an asynchronous, multicast 2D DMA transfer using native-size pointers.

This is a convenience overload of snrt_dma_start_2d_mcast(uint64_t, uint64_t, size_t, size_t, size_t, size_t, uint32_t, uint32_t) using void* pointers.

326 {
327 return snrt_dma_start_2d_mcast((uint64_t)dst, (uint64_t)src, size,
328 dst_stride, src_stride, repeat, mask,
329 channel);
330}

◆ snrt_dma_start_tracking()

void snrt_dma_start_tracking ( )
inline

Start tracking of dma performance region. Does not have any implications on the HW. Only injects a marker in the DMA traces that can be analyzed.

Deprecated
392 {
393#ifdef SNRT_SUPPORTS_DMA
394 asm volatile("dmstati zero, 0 \n");
395#endif
396}

◆ snrt_dma_stop_tracking()

void snrt_dma_stop_tracking ( )
inline

Stop tracking of dma performance region. Does not have any implications on the HW. Only injects a marker in the DMA traces that can be analyzed.

Deprecated
404 {
405#ifdef SNRT_SUPPORTS_DMA
406 asm volatile("dmstati zero, 0 \n");
407#endif
408}

◆ snrt_dma_store_1d_tile()

snrt_dma_txid_t snrt_dma_store_1d_tile ( void * dst,
void * src,
size_t tile_idx,
size_t tile_size,
uint32_t prec )
inline

Store a tile to a 1D array.

Parameters
dstPointer to the destination array.
srcPointer to the source tile.
tile_idxIndex of the tile in the 1D array.
tile_sizeNumber of elements within a tile of the 1D array.
precNumber of bytes of each element in the 1D array.
536 {
537 size_t tile_nbytes = tile_size * prec;
538 return snrt_dma_start_1d((uint64_t)dst + tile_idx * tile_nbytes,
539 (uint64_t)src, tile_nbytes);
540}

◆ snrt_dma_store_2d_tile() [1/2]

snrt_dma_txid_t snrt_dma_store_2d_tile ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec )
inline

Store a 2D tile of a 2D array.

The stride in the source tile is assumed to be that of a 1D tile, effectively. In other words, this is the same as snrt_dma_1d_to_2d().

See also
snrt_dma_store_2d_tile(void *, void *, size_t, size_t, size_t, size_t, size_t, uint32_t, size_t) for a detailed description of the parameters.
735 {
736 return snrt_dma_store_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
737 tile_x1_size, tile_x0_size, full_x0_size,
738 prec, tile_x0_size * prec);
739}
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Store a 2D tile to a 2D array.
Definition dma.h:704

◆ snrt_dma_store_2d_tile() [2/2]

snrt_dma_txid_t snrt_dma_store_2d_tile ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec,
size_t tile_ld )
inline

Store a 2D tile to a 2D array.

Parameters
dstPointer to the destination array.
srcPointer to the source tile.
tile_x1_idxOutermost coordinate of the tile in the 2D array.
tile_x0_idxInnermost coordinate of the tile in the 2D array.
tile_x1_sizeNumber of elements in the outermost dimension of the tile.
tile_x0_sizeNumber of elements in the innermost dimension of the tile.
full_x0_sizeNumber of elements in the innermost dimension of the array.
precNumber of bytes of each element in the 2D array.
tile_ldLeading dimension of the tile, in bytes.
707 {
708 size_t dst_offset = 0;
709 // Advance dst array in x0 and x1 dimensions, and convert to byte offset
710 dst_offset += tile_x0_idx * tile_x0_size;
711 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
712 dst_offset *= prec;
713 // Initiate transfer
714 return snrt_dma_start_2d((uint64_t)dst + dst_offset, // dst
715 (uint64_t)src, // src
716 tile_x0_size * prec, // size
717 full_x0_size * prec, // dst_stride
718 tile_ld, // src_stride
719 tile_x1_size // repeat
720 );
721}

◆ snrt_dma_store_2d_tile_from_banks()

snrt_dma_txid_t snrt_dma_store_2d_tile_from_banks ( void * dst,
void * src,
size_t tile_x1_idx,
size_t tile_x0_idx,
size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
uint32_t prec,
size_t num_banks )
inline

Store a 2D tile of a 2D array from a 1D layout occupying a subset of TCDM banks.

Parameters
dstPointer to the destination array.
srcPointer to the source tile.
tile_x1_idxOutermost coordinate of the tile in the 2D array.
tile_x0_idxInnermost coordinate of the tile in the 2D array.
tile_x1_sizeNumber of elements in the outermost dimension of the tile.
tile_x0_sizeNumber of elements in the innermost dimension of the tile.
full_x0_sizeNumber of elements in the innermost dimension of the array.
precNumber of bytes of each element in the 2D array.
num_banksNumber of banks the tile is stored in.
760 {
761 // Calculate new tile size after reshaping the tile in the selected banks
762 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
763 size_t tile_x1_size_in_banks =
764 ceil((tile_x1_size * tile_x0_size) / (double)tile_x0_size_in_banks);
765 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
766 return snrt_dma_store_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
767 tile_x1_size_in_banks, tile_x0_size_in_banks,
768 full_x0_size, prec, tile_ld);
769}

◆ snrt_dma_wait()

static void snrt_dma_wait ( snrt_dma_txid_t txid,
const uint32_t channel = 0 )
inlinestatic

Block until a DMA transfer finishes on a specific DMA channel.

Parameters
txidThe DMA transfer's ID.
channelThe index of the channel.
Note
The function passes the channel argument as an immediate, thus this must be known at compile time. As a consequence, the function must use internal linkage (static keyword) and must be always inlined. This is true also for all functions invoking this function, and passing down an argument to channel.
343 {
344#ifdef SNRT_SUPPORTS_DMA
345 asm volatile(
346 "1: \n"
347 "dmstati t0, (%[channel] << 2) | 0 \n"
348 "bltu t0, %[txid], 1b \n"
349 :
350 : [ txid ] "r"(txid), [ channel ] "i"(channel)
351 : "t0");
352#endif
353}

◆ snrt_dma_wait_all()

static void snrt_dma_wait_all ( const uint32_t channel = 0)
inlinestatic

Block until a specific DMA channel is idle.

Parameters
channelThe index of the channel.
Note
The function passes the channel argument as an immediate, thus this must be known at compile time. As a consequence, the function must use internal linkage (static keyword) and must be always inlined. This is true also for all functions invoking this function, and passing down an argument to channel.
364 {
365#ifdef SNRT_SUPPORTS_DMA
366 uint32_t busy;
367 asm volatile(
368 "1: \n"
369 "dmstati %[busy], (%[channel] << 2) | 2 \n"
370 "bne %[busy], zero, 1b \n"
371 : [ busy ] "=r"(busy)
372 : [ channel ] "i"(channel));
373#endif
374}

◆ snrt_dma_wait_all_channels()

void snrt_dma_wait_all_channels ( uint32_t num_channels)
inline

Block until the first num_channels channels are idle.

Parameters
num_channelsThe number of channels to wait on.
380 {
381 for (int c = 0; c < num_channels; c++) {
383 }
384}