31static inline uint32_t snrt_dma_start_1d(uint64_t dst, uint64_t src,
33 const uint32_t channel = 0) {
34 uint32_t dst_lo = dst & 0xFFFFFFFF;
35 uint32_t dst_hi = dst >> 32;
36 uint32_t src_lo = src & 0xFFFFFFFF;
37 uint32_t src_hi = src >> 32;
41 "dmsrc %[src_lo], %[src_hi] \n"
42 "dmdst %[dst_lo], %[dst_hi] \n"
43 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b00 \n"
45 : [ src_lo ]
"r"(src_lo), [ src_hi ]
"r"(src_hi),
46 [ dst_lo ]
"r"(dst_lo), [ dst_hi ]
"r"(dst_hi), [ size ]
"r"(size),
47 [ channel ]
"i"(channel));
59static inline uint32_t snrt_dma_start_1d(
volatile void *dst,
volatile void *src,
61 const uint32_t channel = 0) {
62 return snrt_dma_start_1d((uint64_t)dst, (uint64_t)src, size, channel);
70 asm volatile(
"dmuser %[mask], zero \n" : : [ mask ]
"r"(mask));
86static inline uint32_t snrt_dma_start_1d_mcast(uint64_t dst, uint64_t src,
87 size_t size, uint32_t mask,
88 const uint32_t channel = 0) {
90 uint32_t txid = snrt_dma_start_1d(dst, src, size, channel);
104static inline uint32_t snrt_dma_start_1d_mcast(
volatile void *dst,
105 volatile void *src,
size_t size,
107 const uint32_t channel = 0) {
108 return snrt_dma_start_1d_mcast((uint64_t)dst, (uint64_t)src, size, mask,
130static inline snrt_dma_txid_t snrt_dma_start_2d(uint64_t dst, uint64_t src,
131 size_t size,
size_t dst_stride,
134 const uint32_t channel = 0) {
135 uint32_t dst_lo = dst & 0xFFFFFFFF;
136 uint32_t dst_hi = dst >> 32;
137 uint32_t src_lo = src & 0xFFFFFFFF;
138 uint32_t src_hi = src >> 32;
142 "dmsrc %[src_lo], %[src_hi] \n"
143 "dmdst %[dst_lo], %[dst_hi] \n"
144 "dmstr %[src_stride], %[dst_stride] \n"
146 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b10 \n"
147 : [ txid ]
"=r"(txid)
148 : [ src_lo ]
"r"(src_lo), [ src_hi ]
"r"(src_hi),
149 [ dst_lo ]
"r"(dst_lo), [ dst_hi ]
"r"(dst_hi),
150 [ dst_stride ]
"r"(dst_stride), [ src_stride ]
"r"(src_stride),
151 [ repeat ]
"r"(repeat), [ size ]
"r"(size), [ channel ]
"i"(channel));
164static inline uint32_t snrt_dma_start_2d(
volatile void *dst,
volatile void *src,
165 size_t size,
size_t dst_stride,
166 size_t src_stride,
size_t repeat,
167 const uint32_t channel = 0) {
168 return snrt_dma_start_2d((uint64_t)dst, (uint64_t)src, size, dst_stride,
169 src_stride, repeat, channel);
183 const uint32_t channel = 0) {
186 "dmstati t0, (%[channel] << 2) | 0 \n"
187 "bltu t0, %[txid], 1b \n"
189 : [ txid ]
"r"(txid), [ channel ]
"i"(channel)
202static inline void snrt_dma_wait_all(
const uint32_t channel = 0) {
206 "dmstati %[busy], (%[channel] << 2) | 2 \n"
207 "bne %[busy], zero, 1b \n"
208 : [ busy ]
"=r"(busy)
209 : [ channel ]
"i"(channel));
217 for (
int c = 0; c < num_channels; c++) {
218 snrt_dma_wait_all(c);
247 uint8_t *p = (uint8_t *)ptr;
248 uint32_t nbytes = 64;
255 snrt_dma_start_2d(ptr, ptr, 64, 64, 0, len / 64);
269 size_t tile_idx,
size_t tile_size,
271 size_t tile_nbytes = tile_size * prec;
272 return snrt_dma_start_1d(
273 (uint64_t)dst, (uint64_t)src + tile_idx * tile_nbytes, tile_nbytes);
290 size_t tile_nbytes = tile_size * prec;
291 return snrt_dma_start_1d_mcast((uintptr_t)dst,
292 (uintptr_t)src + tile_idx * tile_nbytes,
305 size_t size,
size_t row_size,
307 return snrt_dma_start_2d(dst, src, row_size, stride, row_size,
320 size_t size,
size_t row_size,
322 return snrt_dma_start_2d(dst, src, row_size, row_size, stride,
335 size_t tile_idx,
size_t tile_size,
337 size_t tile_nbytes = tile_size * prec;
338 return snrt_dma_start_1d((uint64_t)dst + tile_idx * tile_nbytes,
339 (uint64_t)src, tile_nbytes);
358 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
359 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
360 uint32_t prec,
size_t tile_ld) {
361 size_t src_offset = 0;
363 src_offset += tile_x0_idx * tile_x0_size;
364 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
367 return snrt_dma_start_2d((uint64_t)dst,
368 (uint64_t)src + src_offset,
377 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
378 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
381 tile_x1_size, tile_x0_size, full_x0_size, prec,
382 tile_x0_size * prec);
402 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
403 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
404 uint32_t prec,
size_t num_banks) {
406 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
407 size_t tile_x1_size_in_banks =
408 ceil((tile_x1_size * tile_x0_size) / (
double)tile_x0_size_in_banks);
409 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
411 tile_x1_size_in_banks, tile_x0_size_in_banks,
412 full_x0_size, prec, tile_ld);
431 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
432 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
433 uint32_t prec,
size_t tile_ld) {
434 size_t dst_offset = 0;
436 dst_offset += tile_x0_idx * tile_x0_size;
437 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
440 return snrt_dma_start_2d((uint64_t)dst + dst_offset,
450 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
451 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
454 tile_x1_size, tile_x0_size, full_x0_size,
455 prec, tile_x0_size * prec);
459 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
460 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
461 uint32_t prec,
size_t num_banks) {
463 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
464 size_t tile_x1_size_in_banks =
465 ceil((tile_x1_size * tile_x0_size) / (
double)tile_x0_size_in_banks);
466 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
468 tile_x1_size_in_banks, tile_x0_size_in_banks,
469 full_x0_size, prec, tile_ld);
void snrt_dma_enable_mcast(uint32_t mask)
Enable multicast for successive transfers.
Definition dma.h:69
snrt_dma_txid_t snrt_dma_load_1d_tile(volatile void *dst, volatile void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Load a tile of a 1D array.
Definition dma.h:267
snrt_dma_txid_t snrt_dma_1d_to_2d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 1D array into a 2D array.
Definition dma.h:304
snrt_dma_txid_t snrt_dma_mcast_load_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec, uint32_t mcast)
Load a tile of a 1D array.
Definition dma.h:285
uint32_t snrt_dma_txid_t
A DMA transfer identifier.
Definition dma.h:15
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Store a 2D tile to a 2D array.
Definition dma.h:430
snrt_dma_txid_t snrt_dma_load_2d_tile_in_banks(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t num_banks)
Load a 2D tile of a 2D array and reshape it to occupy a subset of TCDM banks.
Definition dma.h:401
snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Store a tile to a 1D array.
Definition dma.h:334
snrt_dma_txid_t snrt_dma_2d_to_1d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 2D array into a 1D array.
Definition dma.h:319
void snrt_dma_wait_all_channels(uint32_t num_channels)
Block until the first num_channels channels are idle.
Definition dma.h:216
void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len)
Fast memset function performed by DMA.
Definition dma.h:244
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Load a 2D tile of a 2D array.
Definition dma.h:357
void snrt_dma_start_tracking()
Start tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:228
void snrt_dma_disable_mcast()
Disable multicast for successive transfers.
Definition dma.h:77
void snrt_dma_stop_tracking()
Stop tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:236