Snitch Runtime
Loading...
Searching...
No Matches
dma.h
Go to the documentation of this file.
1// Copyright 2023 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
10#pragma once
11
12#define OP_CUSTOM1 0b0101011
13#define XDMA_FUNCT3 0b000
14#define DMSRC_FUNCT7 0b0000000
15#define DMDST_FUNCT7 0b0000001
16#define DMCPYI_FUNCT7 0b0000010
17#define DMCPY_FUNCT7 0b0000011
18#define DMSTATI_FUNCT7 0b0000100
19#define DMSTAT_FUNCT7 0b0000101
20#define DMSTR_FUNCT7 0b0000110
21#define DMREP_FUNCT7 0b0000111
22
23#define R_TYPE_ENCODE(funct7, rs2, rs1, funct3, rd, opcode) \
24 ((funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | \
25 (opcode))
26
28typedef uint32_t snrt_dma_txid_t;
29
37inline uint32_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src,
38 size_t size) {
39 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
40 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
41 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
42 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
43 register uint32_t reg_size asm("a4") = size; // 14
44
45 // dmsrc a2, a3
46 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
47 XDMA_FUNCT3, 0, OP_CUSTOM1)),
48 "r"(reg_src_high), "r"(reg_src_low));
49
50 // dmdst a0, a1
51 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
52 XDMA_FUNCT3, 0, OP_CUSTOM1)),
53 "r"(reg_dst_high), "r"(reg_dst_low));
54
55 // dmcpyi a0, a4, 0b00
56 register uint32_t reg_txid asm("a0"); // 10
57 asm volatile(".word %1\n"
58 : "=r"(reg_txid)
59 : "i"(R_TYPE_ENCODE(DMCPYI_FUNCT7, 0b00000, 14, XDMA_FUNCT3,
60 10, OP_CUSTOM1)),
61 "r"(reg_size));
62
63 return reg_txid;
64}
65
73inline snrt_dma_txid_t snrt_dma_start_1d(void *dst, const void *src,
74 size_t size) {
75 return snrt_dma_start_1d_wideptr((size_t)dst, (size_t)src, size);
76}
77
90inline snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src,
91 size_t size, size_t dst_stride,
92 size_t src_stride,
93 size_t repeat) {
94 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
95 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
96 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
97 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
98 register uint32_t reg_size asm("a4") = size; // 14
99 register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15
100 register uint32_t reg_src_stride asm("a6") = src_stride; // 16
101 register uint32_t reg_repeat asm("a7") = repeat; // 17
102
103 // dmsrc a0, a1
104 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
105 XDMA_FUNCT3, 0, OP_CUSTOM1)),
106 "r"(reg_src_high), "r"(reg_src_low));
107
108 // dmdst a0, a1
109 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
110 XDMA_FUNCT3, 0, OP_CUSTOM1)),
111 "r"(reg_dst_high), "r"(reg_dst_low));
112
113 // dmstr a5, a6
114 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSTR_FUNCT7, 15, 16,
115 XDMA_FUNCT3, 0, OP_CUSTOM1)),
116 "r"(reg_src_stride), "r"(reg_dst_stride));
117
118 // dmrep a7
119 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMREP_FUNCT7, 0, 17,
120 XDMA_FUNCT3, 0, OP_CUSTOM1)),
121 "r"(reg_repeat));
122
123 // dmcpyi a0, a4, 0b10
124 register uint32_t reg_txid asm("a0"); // 10
125 asm volatile(".word %1\n"
126 : "=r"(reg_txid)
127 : "i"(R_TYPE_ENCODE(DMCPYI_FUNCT7, 0b00010, 14, XDMA_FUNCT3,
128 10, OP_CUSTOM1)),
129 "r"(reg_size));
130
131 return reg_txid;
132}
133
146inline snrt_dma_txid_t snrt_dma_start_2d(void *dst, const void *src,
147 size_t size, size_t dst_stride,
148 size_t src_stride, size_t repeat) {
149 return snrt_dma_start_2d_wideptr((size_t)dst, (size_t)src, size, dst_stride,
150 src_stride, repeat);
151}
152
163 uint64_t src,
164 size_t size,
165 uint32_t channel) {
166 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
167 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
168 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
169 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
170 register uint32_t reg_size asm("a4") = size; // 14
171 register uint32_t cfg asm("a5") = channel << 2; // 15
172
173 // dmsrc a2, a3
174 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
175 XDMA_FUNCT3, 0, OP_CUSTOM1)),
176 "r"(reg_src_high), "r"(reg_src_low));
177
178 // dmdst a0, a1
179 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
180 XDMA_FUNCT3, 0, OP_CUSTOM1)),
181 "r"(reg_dst_high), "r"(reg_dst_low));
182
183 // dmcpy a0, a4, a5
184 register uint32_t reg_txid asm("a0"); // 10
185 asm volatile(
186 ".word %1\n"
187 : "=r"(reg_txid)
188 : "i"(R_TYPE_ENCODE(DMCPY_FUNCT7, 15, 14, XDMA_FUNCT3, 10, OP_CUSTOM1)),
189 "r"(reg_size), "r"(cfg));
190
191 return reg_txid;
192}
193
203inline snrt_dma_txid_t snrt_dma_start_1d_channel(void *dst, const void *src,
204 size_t size,
205 uint32_t channel) {
206 return snrt_dma_start_1d_channel_wideptr((size_t)dst, (size_t)src, size,
207 channel);
208}
209
225 uint64_t dst, uint64_t src, size_t size, size_t dst_stride,
226 size_t src_stride, size_t repeat, uint32_t channel) {
227 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
228 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
229 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
230 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
231 register uint32_t reg_size asm("a4") = size; // 14
232 register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15
233 register uint32_t reg_src_stride asm("a6") = src_stride; // 16
234 register uint32_t reg_repeat asm("a7") = repeat; // 17
235 register uint32_t cfg asm("t2") = channel << 2 | 2; // 7
236
237 // dmsrc a0, a1
238 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
239 XDMA_FUNCT3, 0, OP_CUSTOM1)),
240 "r"(reg_src_high), "r"(reg_src_low));
241
242 // dmdst a0, a1
243 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
244 XDMA_FUNCT3, 0, OP_CUSTOM1)),
245 "r"(reg_dst_high), "r"(reg_dst_low));
246
247 // dmstr a5, a6
248 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSTR_FUNCT7, 15, 16,
249 XDMA_FUNCT3, 0, OP_CUSTOM1)),
250 "r"(reg_src_stride), "r"(reg_dst_stride));
251
252 // dmrep a7
253 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMREP_FUNCT7, 0, 17,
254 XDMA_FUNCT3, 0, OP_CUSTOM1)),
255 "r"(reg_repeat));
256
257 // dmcpy a0, a4, t2
258 register uint32_t reg_txid asm("a0"); // 10
259 asm volatile(
260 ".word %1\n"
261 : "=r"(reg_txid)
262 : "i"(R_TYPE_ENCODE(DMCPY_FUNCT7, 7, 14, XDMA_FUNCT3, 10, OP_CUSTOM1)),
263 "r"(cfg), "r"(reg_size));
264
265 return reg_txid;
266}
267
282inline snrt_dma_txid_t snrt_dma_start_2d_channel(void *dst, const void *src,
283 size_t size, size_t dst_stride,
284 size_t src_stride,
285 size_t repeat,
286 uint32_t channel) {
287 return snrt_dma_start_2d_channel_wideptr((size_t)dst, (size_t)src, size,
288 dst_stride, src_stride, repeat,
289 channel);
290}
291
297 // dmstati t0, 0 # 0=status.completed_id
298 asm volatile(
299 "1: \n"
300 ".word %0\n"
301 "bltu t0, %1, 1b \n" ::"i"(
302 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
303 "r"(tid)
304 : "t0");
305}
306
311inline void snrt_dma_wait_channel(snrt_dma_txid_t tid, uint32_t channel) {
312 // dmstati t0, 0 # 0=status.completed_id
313 register uint32_t cfg asm("t1") = channel << 2;
314 asm volatile(
315 "1: \n"
316 ".word %0\n"
317 "sub t0, t0, %1 \n"
318 "blez t0, 1b \n" ::"i"(
319 R_TYPE_ENCODE(DMSTAT_FUNCT7, 6, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
320 "r"(tid), "r"(cfg)
321 : "t0");
322}
323
327inline void snrt_dma_wait_all() {
328 // dmstati t0, 2 # 2=status.busy
329 asm volatile(
330 "1: \n"
331 ".word %0\n"
332 "bne t0, zero, 1b \n" ::"i"(
333 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b10, 0, XDMA_FUNCT3, 5, OP_CUSTOM1))
334 : "t0");
335}
336
341inline void snrt_dma_wait_all_channel(uint32_t channel) {
342 register uint32_t tmp;
343 // dmstati t0, 2 # 2=status.busy
344 register uint32_t cfg asm("t1") = channel << 2 | 2;
345 asm volatile(
346 "1: \n"
347 ".word %0\n"
348 "bne t0, zero, 1b \n" ::"i"(
349 R_TYPE_ENCODE(DMSTAT_FUNCT7, 6, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
350 "r"(cfg)
351 : "t0");
352}
353
358inline void snrt_dma_wait_all_channels(uint32_t num_channels) {
359 register uint32_t tmp;
360 // dmstati t0, 2 # 2=status.busy
361 for (int c = 0; c < num_channels; c++) {
363 }
364}
365
373 // dmstati zero, 0
374 asm volatile(".word %0\n" ::"i"(
375 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 0, OP_CUSTOM1)));
376}
377
385 asm volatile(".word %0\n" ::"i"(
386 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 3, OP_CUSTOM1)));
387}
388
395inline void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len) {
396 // set first 64bytes to value
397 // memset(ptr, value, 64);
398 uint8_t *p = ptr;
399 uint32_t nbytes = 64;
400 while (nbytes--) {
401 *p++ = value;
402 }
403
404 // DMA copy the the rest
405 snrt_dma_txid_t memset_txid =
406 snrt_dma_start_2d(ptr, ptr, 64, 64, 0, len / 64);
408}
409
418inline snrt_dma_txid_t snrt_dma_load_1d_tile(void *dst, void *src,
419 size_t tile_idx, size_t tile_size,
420 uint32_t prec) {
421 size_t tile_nbytes = tile_size * prec;
422 return snrt_dma_start_1d(dst, src + tile_idx * tile_nbytes, tile_nbytes);
423}
424
433inline snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src,
434 size_t tile_idx, size_t tile_size,
435 uint32_t prec) {
436 size_t tile_nbytes = tile_size * prec;
437 return snrt_dma_start_1d(dst + tile_idx * tile_nbytes, src, tile_nbytes);
438}
439
455 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
456 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
457 uint32_t prec) {
458 size_t src_offset = 0;
459 // Advance src array in x0 and x1 dimensions, and convert to byte offset
460 src_offset += tile_x0_idx * tile_x0_size;
461 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
462 src_offset *= prec;
463 // Initiate transfer
464 return snrt_dma_start_2d(dst, // dst
465 src + src_offset, // src
466 tile_x0_size * prec, // size
467 tile_x0_size * prec, // dst_stride
468 full_x0_size * prec, // src_stride
469 tile_x1_size // repeat
470 );
471}
472
488 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
489 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
490 uint32_t prec) {
491 size_t dst_offset = 0;
492 // Advance dst array in x0 and x1 dimensions, and convert to byte offset
493 dst_offset += tile_x0_idx * tile_x0_size;
494 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
495 dst_offset *= prec;
496 // Initiate transfer
497 return snrt_dma_start_2d(dst + dst_offset, // dst
498 src, // src
499 tile_x0_size * prec, // size
500 full_x0_size * prec, // dst_stride
501 tile_x0_size * prec, // src_stride
502 tile_x1_size // repeat
503 );
504}
void snrt_dma_wait_all()
Block until all DMA operation ceases.
Definition dma.h:327
snrt_dma_txid_t snrt_dma_start_1d_channel(void *dst, const void *src, size_t size, uint32_t channel)
Start an asynchronous 1D DMA transfer with native-size pointers on a specific channel.
Definition dma.h:203
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
Store a 2D tile to a 2D array.
Definition dma.h:487
uint32_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src, size_t size)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers.
Definition dma.h:37
snrt_dma_txid_t snrt_dma_load_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Load a tile of a 1D array.
Definition dma.h:418
snrt_dma_txid_t snrt_dma_start_2d(void *dst, const void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat)
Start an asynchronous 2D DMA transfer with native-size pointers.
Definition dma.h:146
uint32_t snrt_dma_txid_t
A DMA transfer identifier.
Definition dma.h:28
snrt_dma_txid_t snrt_dma_start_1d_channel_wideptr(uint64_t dst, uint64_t src, size_t size, uint32_t channel)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers on a specific channel.
Definition dma.h:162
void snrt_dma_wait(snrt_dma_txid_t tid)
Block until a DMA transfer finishes.
Definition dma.h:296
snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Store a tile to a 1D array.
Definition dma.h:433
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
Load a 2D tile of a 2D array.
Definition dma.h:454
void snrt_dma_wait_all_channels(uint32_t num_channels)
Block until the first num_channels channels are idle.
Definition dma.h:358
void snrt_dma_wait_all_channel(uint32_t channel)
Block until a specific DMA channel is idle.
Definition dma.h:341
snrt_dma_txid_t snrt_dma_start_2d_channel(void *dst, const void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t channel)
Start an asynchronous 2D DMA transfer with native-size pointers on a specific channel.
Definition dma.h:282
void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len)
Fast memset function performed by DMA.
Definition dma.h:395
snrt_dma_txid_t snrt_dma_start_1d(void *dst, const void *src, size_t size)
Start an asynchronous 1D DMA transfer with native-size pointers.
Definition dma.h:73
void snrt_dma_wait_channel(snrt_dma_txid_t tid, uint32_t channel)
Block until a DMA transfer finishes on a specific channel.
Definition dma.h:311
void snrt_dma_start_tracking()
Start tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:372
snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers.
Definition dma.h:90
snrt_dma_txid_t snrt_dma_start_2d_channel_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t channel)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers on a specific channel.
Definition dma.h:224
void snrt_dma_stop_tracking()
Stop tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:384