33 const uint32_t channel = 0) {
34 uint32_t dst_lo = dst & 0xFFFFFFFF;
35 uint32_t dst_hi = dst >> 32;
36 uint32_t src_lo = src & 0xFFFFFFFF;
37 uint32_t src_hi = src >> 32;
41 "dmsrc %[src_lo], %[src_hi] \n"
42 "dmdst %[dst_lo], %[dst_hi] \n"
43 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b00 \n"
45 : [ src_lo ]
"r"(src_lo), [ src_hi ]
"r"(src_hi),
46 [ dst_lo ]
"r"(dst_lo), [ dst_hi ]
"r"(dst_hi), [ size ]
"r"(size),
47 [ channel ]
"i"(channel));
60 const uint32_t channel = 0) {
69 asm volatile(
"dmuser %[mask], zero \n" : : [ mask ]
"r"(mask));
86 size_t size, uint32_t mask,
87 const uint32_t channel = 0) {
103 volatile void *src,
size_t size,
105 const uint32_t channel = 0) {
129 size_t size,
size_t dst_stride,
132 const uint32_t channel = 0) {
133 uint32_t dst_lo = dst & 0xFFFFFFFF;
134 uint32_t dst_hi = dst >> 32;
135 uint32_t src_lo = src & 0xFFFFFFFF;
136 uint32_t src_hi = src >> 32;
140 "dmsrc %[src_lo], %[src_hi] \n"
141 "dmdst %[dst_lo], %[dst_hi] \n"
142 "dmstr %[src_stride], %[dst_stride] \n"
144 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b10 \n"
145 : [ txid ]
"=r"(txid)
146 : [ src_lo ]
"r"(src_lo), [ src_hi ]
"r"(src_hi),
147 [ dst_lo ]
"r"(dst_lo), [ dst_hi ]
"r"(dst_hi),
148 [ dst_stride ]
"r"(dst_stride), [ src_stride ]
"r"(src_stride),
149 [ repeat ]
"r"(repeat), [ size ]
"r"(size), [ channel ]
"i"(channel));
162 size_t size,
size_t dst_stride,
163 size_t src_stride,
size_t repeat,
164 const uint32_t channel = 0) {
166 src_stride, repeat, channel);
179 size_t size,
size_t dst_stride,
180 size_t src_stride,
size_t repeat,
182 const uint32_t channel = 0) {
199 volatile void *src,
size_t size,
201 size_t src_stride,
size_t repeat,
203 const uint32_t channel = 0) {
205 dst_stride, src_stride, repeat, mask,
220 const uint32_t channel = 0) {
223 "dmstati t0, (%[channel] << 2) | 0 \n"
224 "bltu t0, %[txid], 1b \n"
226 : [ txid ]
"r"(txid), [ channel ]
"i"(channel)
243 "dmstati %[busy], (%[channel] << 2) | 2 \n"
244 "bne %[busy], zero, 1b \n"
245 : [ busy ]
"=r"(busy)
246 : [ channel ]
"i"(channel));
254 for (
int c = 0; c < num_channels; c++) {
286 size_t n_1d_transfers = len / 64;
287 size_t use_dma = (len % 64) == 0 && len > 64;
288 uint8_t *p = (uint8_t *)ptr;
290 uint32_t nbytes = len < 64 || !use_dma ? len : 64;
311 size_t tile_idx,
size_t tile_size,
313 size_t tile_nbytes = tile_size * prec;
315 (uint64_t)dst, (uint64_t)src + tile_idx * tile_nbytes, tile_nbytes);
332 size_t tile_nbytes = tile_size * prec;
334 (uintptr_t)src + tile_idx * tile_nbytes,
347 size_t size,
size_t row_size,
362 size_t size,
size_t row_size,
377 size_t tile_idx,
size_t tile_size,
379 size_t tile_nbytes = tile_size * prec;
381 (uint64_t)src, tile_nbytes);
400 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
401 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
402 uint32_t prec,
size_t tile_ld) {
403 size_t src_offset = 0;
405 src_offset += tile_x0_idx * tile_x0_size;
406 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
410 (uint64_t)src + src_offset,
428 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
429 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
432 tile_x1_size, tile_x0_size, full_x0_size, prec,
433 tile_x0_size * prec);
444 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
445 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
446 uint32_t prec,
size_t tile_ld, uint32_t mask) {
447 size_t src_offset = 0;
449 src_offset += tile_x0_idx * tile_x0_size;
450 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
454 (uint64_t)src + src_offset,
473 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
474 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
475 uint32_t prec, uint32_t mask) {
477 tile_x1_size, tile_x0_size, full_x0_size,
478 prec, tile_x0_size * prec, mask);
498 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
499 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
500 uint32_t prec,
size_t num_banks) {
502 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
503 size_t tile_x1_size_in_banks =
504 ceil((tile_x1_size * tile_x0_size) / (
double)tile_x0_size_in_banks);
505 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
507 tile_x1_size_in_banks, tile_x0_size_in_banks,
508 full_x0_size, prec, tile_ld);
527 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
528 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
529 uint32_t prec,
size_t tile_ld) {
530 size_t dst_offset = 0;
532 dst_offset += tile_x0_idx * tile_x0_size;
533 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
555 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
556 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
559 tile_x1_size, tile_x0_size, full_x0_size,
560 prec, tile_x0_size * prec);
580 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
581 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
582 uint32_t prec,
size_t num_banks) {
584 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
585 size_t tile_x1_size_in_banks =
586 ceil((tile_x1_size * tile_x0_size) / (
double)tile_x0_size_in_banks);
587 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
589 tile_x1_size_in_banks, tile_x0_size_in_banks,
590 full_x0_size, prec, tile_ld);
void snrt_dma_enable_mcast(uint32_t mask)
Enable multicast for successive transfers.
Definition dma.h:68
snrt_dma_txid_t snrt_dma_load_1d_tile(volatile void *dst, volatile void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Load a tile of a 1D array.
Definition dma.h:309
snrt_dma_txid_t snrt_dma_1d_to_2d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 1D array into a 2D array.
Definition dma.h:346
static snrt_dma_txid_t snrt_dma_start_2d(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, const uint32_t channel=0)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers.
Definition dma.h:128
snrt_dma_txid_t snrt_dma_store_2d_tile_from_banks(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t num_banks)
Store a 2D tile of a 2D array from a 1D layout occupying a subset of TCDM banks.
Definition dma.h:579
static uint32_t snrt_dma_start_1d(uint64_t dst, uint64_t src, size_t size, const uint32_t channel=0)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers on a specific DMA channel.
Definition dma.h:31
uint32_t snrt_dma_txid_t
A DMA transfer identifier.
Definition dma.h:15
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Store a 2D tile to a 2D array.
Definition dma.h:526
snrt_dma_txid_t snrt_dma_load_2d_tile_in_banks(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t num_banks)
Load a 2D tile of a 2D array and reshape it to occupy a subset of TCDM banks.
Definition dma.h:497
snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Store a tile to a 1D array.
Definition dma.h:376
static uint32_t snrt_dma_start_2d_mcast(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t mask, const uint32_t channel=0)
Start an asynchronous, multicast 2D DMA transfer with 64-bit wide pointers.
Definition dma.h:178
snrt_dma_txid_t snrt_dma_2d_to_1d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 2D array into a 1D array.
Definition dma.h:361
void snrt_dma_wait_all_channels(uint32_t num_channels)
Block until the first num_channels channels are idle.
Definition dma.h:253
snrt_dma_txid_t snrt_dma_load_1d_tile_mcast(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec, uint32_t mcast)
Load a tile of a 1D array.
Definition dma.h:327
static uint32_t snrt_dma_start_1d_mcast(uint64_t dst, uint64_t src, size_t size, uint32_t mask, const uint32_t channel=0)
Start an asynchronous multicast 1D DMA transfer with 64-bit wide pointers.
Definition dma.h:85
void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len)
Fast memset function performed by DMA.
Definition dma.h:282
static void snrt_dma_wait(snrt_dma_txid_t txid, const uint32_t channel=0)
Block until a DMA transfer finishes on a specific DMA channel.
Definition dma.h:219
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Load a 2D tile of a 2D array.
Definition dma.h:399
static void snrt_dma_wait_all(const uint32_t channel=0)
Block until a specific DMA channel is idle.
Definition dma.h:239
void snrt_dma_start_tracking()
Start tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:265
void snrt_dma_disable_mcast()
Disable multicast for successive transfers.
Definition dma.h:76
snrt_dma_txid_t snrt_dma_load_2d_tile_mcast(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld, uint32_t mask)
Load a 2D tile of a 2D array using multicast.
Definition dma.h:443
void snrt_dma_stop_tracking()
Stop tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:273