12#define OP_CUSTOM1 0b0101011
13#define XDMA_FUNCT3 0b000
14#define DMSRC_FUNCT7 0b0000000
15#define DMDST_FUNCT7 0b0000001
16#define DMCPYI_FUNCT7 0b0000010
17#define DMCPY_FUNCT7 0b0000011
18#define DMSTATI_FUNCT7 0b0000100
19#define DMSTAT_FUNCT7 0b0000101
20#define DMSTR_FUNCT7 0b0000110
21#define DMREP_FUNCT7 0b0000111
23#define R_TYPE_ENCODE(funct7, rs2, rs1, funct3, rd, opcode) \
24 ((funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | \
39 register uint32_t reg_dst_low
asm(
"a0") = dst >> 0;
40 register uint32_t reg_dst_high
asm(
"a1") = dst >> 32;
41 register uint32_t reg_src_low
asm(
"a2") = src >> 0;
42 register uint32_t reg_src_high
asm(
"a3") = src >> 32;
43 register uint32_t reg_size
asm(
"a4") = size;
46 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
47 XDMA_FUNCT3, 0, OP_CUSTOM1)),
48 "r"(reg_src_high),
"r"(reg_src_low));
51 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
52 XDMA_FUNCT3, 0, OP_CUSTOM1)),
53 "r"(reg_dst_high),
"r"(reg_dst_low));
56 register uint32_t reg_txid
asm(
"a0");
57 asm volatile(
".word %1\n"
59 :
"i"(R_TYPE_ENCODE(DMCPYI_FUNCT7, 0b00000, 14, XDMA_FUNCT3,
91 size_t size,
size_t dst_stride,
94 register uint32_t reg_dst_low
asm(
"a0") = dst >> 0;
95 register uint32_t reg_dst_high
asm(
"a1") = dst >> 32;
96 register uint32_t reg_src_low
asm(
"a2") = src >> 0;
97 register uint32_t reg_src_high
asm(
"a3") = src >> 32;
98 register uint32_t reg_size
asm(
"a4") = size;
99 register uint32_t reg_dst_stride
asm(
"a5") = dst_stride;
100 register uint32_t reg_src_stride
asm(
"a6") = src_stride;
101 register uint32_t reg_repeat
asm(
"a7") = repeat;
104 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
105 XDMA_FUNCT3, 0, OP_CUSTOM1)),
106 "r"(reg_src_high),
"r"(reg_src_low));
109 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
110 XDMA_FUNCT3, 0, OP_CUSTOM1)),
111 "r"(reg_dst_high),
"r"(reg_dst_low));
114 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMSTR_FUNCT7, 15, 16,
115 XDMA_FUNCT3, 0, OP_CUSTOM1)),
116 "r"(reg_src_stride),
"r"(reg_dst_stride));
119 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMREP_FUNCT7, 0, 17,
120 XDMA_FUNCT3, 0, OP_CUSTOM1)),
124 register uint32_t reg_txid
asm(
"a0");
125 asm volatile(
".word %1\n"
127 :
"i"(R_TYPE_ENCODE(DMCPYI_FUNCT7, 0b00010, 14, XDMA_FUNCT3,
147 size_t size,
size_t dst_stride,
148 size_t src_stride,
size_t repeat) {
166 register uint32_t reg_dst_low
asm(
"a0") = dst >> 0;
167 register uint32_t reg_dst_high
asm(
"a1") = dst >> 32;
168 register uint32_t reg_src_low
asm(
"a2") = src >> 0;
169 register uint32_t reg_src_high
asm(
"a3") = src >> 32;
170 register uint32_t reg_size
asm(
"a4") = size;
171 register uint32_t cfg
asm(
"a5") = channel << 2;
174 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
175 XDMA_FUNCT3, 0, OP_CUSTOM1)),
176 "r"(reg_src_high),
"r"(reg_src_low));
179 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
180 XDMA_FUNCT3, 0, OP_CUSTOM1)),
181 "r"(reg_dst_high),
"r"(reg_dst_low));
184 register uint32_t reg_txid
asm(
"a0");
188 :
"i"(R_TYPE_ENCODE(DMCPY_FUNCT7, 15, 14, XDMA_FUNCT3, 10, OP_CUSTOM1)),
189 "r"(reg_size),
"r"(cfg));
225 uint64_t dst, uint64_t src,
size_t size,
size_t dst_stride,
226 size_t src_stride,
size_t repeat, uint32_t channel) {
227 register uint32_t reg_dst_low
asm(
"a0") = dst >> 0;
228 register uint32_t reg_dst_high
asm(
"a1") = dst >> 32;
229 register uint32_t reg_src_low
asm(
"a2") = src >> 0;
230 register uint32_t reg_src_high
asm(
"a3") = src >> 32;
231 register uint32_t reg_size
asm(
"a4") = size;
232 register uint32_t reg_dst_stride
asm(
"a5") = dst_stride;
233 register uint32_t reg_src_stride
asm(
"a6") = src_stride;
234 register uint32_t reg_repeat
asm(
"a7") = repeat;
235 register uint32_t cfg
asm(
"t2") = channel << 2 | 2;
238 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
239 XDMA_FUNCT3, 0, OP_CUSTOM1)),
240 "r"(reg_src_high),
"r"(reg_src_low));
243 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
244 XDMA_FUNCT3, 0, OP_CUSTOM1)),
245 "r"(reg_dst_high),
"r"(reg_dst_low));
248 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMSTR_FUNCT7, 15, 16,
249 XDMA_FUNCT3, 0, OP_CUSTOM1)),
250 "r"(reg_src_stride),
"r"(reg_dst_stride));
253 asm volatile(
".word %0\n" ::
"i"(R_TYPE_ENCODE(DMREP_FUNCT7, 0, 17,
254 XDMA_FUNCT3, 0, OP_CUSTOM1)),
258 register uint32_t reg_txid
asm(
"a0");
262 :
"i"(R_TYPE_ENCODE(DMCPY_FUNCT7, 7, 14, XDMA_FUNCT3, 10, OP_CUSTOM1)),
263 "r"(cfg),
"r"(reg_size));
283 size_t size,
size_t dst_stride,
288 dst_stride, src_stride, repeat,
301 "bltu t0, %1, 1b \n" ::
"i"(
302 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
313 register uint32_t cfg
asm(
"t1") = channel << 2;
318 "blez t0, 1b \n" ::
"i"(
319 R_TYPE_ENCODE(DMSTAT_FUNCT7, 6, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
332 "bne t0, zero, 1b \n" ::
"i"(
333 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b10, 0, XDMA_FUNCT3, 5, OP_CUSTOM1))
342 register uint32_t tmp;
344 register uint32_t cfg
asm(
"t1") = channel << 2 | 2;
348 "bne t0, zero, 1b \n" ::
"i"(
349 R_TYPE_ENCODE(DMSTAT_FUNCT7, 6, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
359 register uint32_t tmp;
361 for (
int c = 0; c < num_channels; c++) {
374 asm volatile(
".word %0\n" ::
"i"(
375 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 0, OP_CUSTOM1)));
385 asm volatile(
".word %0\n" ::
"i"(
386 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 3, OP_CUSTOM1)));
399 uint32_t nbytes = 64;
419 size_t tile_idx,
size_t tile_size,
421 size_t tile_nbytes = tile_size * prec;
434 size_t tile_idx,
size_t tile_size,
436 size_t tile_nbytes = tile_size * prec;
455 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
456 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
458 size_t src_offset = 0;
460 src_offset += tile_x0_idx * tile_x0_size;
461 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
488 void *dst,
void *src,
size_t tile_x1_idx,
size_t tile_x0_idx,
489 size_t tile_x1_size,
size_t tile_x0_size,
size_t full_x0_size,
491 size_t dst_offset = 0;
493 dst_offset += tile_x0_idx * tile_x0_size;
494 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
void snrt_dma_wait_all()
Block until all DMA operation ceases.
Definition dma.h:327
snrt_dma_txid_t snrt_dma_start_1d_channel(void *dst, const void *src, size_t size, uint32_t channel)
Start an asynchronous 1D DMA transfer with native-size pointers on a specific channel.
Definition dma.h:203
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
Store a 2D tile to a 2D array.
Definition dma.h:487
uint32_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src, size_t size)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers.
Definition dma.h:37
snrt_dma_txid_t snrt_dma_load_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Load a tile of a 1D array.
Definition dma.h:418
snrt_dma_txid_t snrt_dma_start_2d(void *dst, const void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat)
Start an asynchronous 2D DMA transfer with native-size pointers.
Definition dma.h:146
uint32_t snrt_dma_txid_t
A DMA transfer identifier.
Definition dma.h:28
snrt_dma_txid_t snrt_dma_start_1d_channel_wideptr(uint64_t dst, uint64_t src, size_t size, uint32_t channel)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers on a specific channel.
Definition dma.h:162
void snrt_dma_wait(snrt_dma_txid_t tid)
Block until a DMA transfer finishes.
Definition dma.h:296
snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Store a tile to a 1D array.
Definition dma.h:433
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
Load a 2D tile of a 2D array.
Definition dma.h:454
void snrt_dma_wait_all_channels(uint32_t num_channels)
Block until the first num_channels channels are idle.
Definition dma.h:358
void snrt_dma_wait_all_channel(uint32_t channel)
Block until a specific DMA channel is idle.
Definition dma.h:341
snrt_dma_txid_t snrt_dma_start_2d_channel(void *dst, const void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t channel)
Start an asynchronous 2D DMA transfer with native-size pointers on a specific channel.
Definition dma.h:282
void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len)
Fast memset function performed by DMA.
Definition dma.h:395
snrt_dma_txid_t snrt_dma_start_1d(void *dst, const void *src, size_t size)
Start an asynchronous 1D DMA transfer with native-size pointers.
Definition dma.h:73
void snrt_dma_wait_channel(snrt_dma_txid_t tid, uint32_t channel)
Block until a DMA transfer finishes on a specific channel.
Definition dma.h:311
void snrt_dma_start_tracking()
Start tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:372
snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers.
Definition dma.h:90
snrt_dma_txid_t snrt_dma_start_2d_channel_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t channel)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers on a specific channel.
Definition dma.h:224
void snrt_dma_stop_tracking()
Stop tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:384