Snitch Runtime
Loading...
Searching...
No Matches
dma.h
Go to the documentation of this file.
1// Copyright 2023 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
10#pragma once
11
12#define XDMA_FUNCT3 0b000
13#define DMSRC_FUNCT7 0b0000000
14#define DMDST_FUNCT7 0b0000001
15#define DMCPYI_FUNCT7 0b0000010
16#define DMCPY_FUNCT7 0b0000011
17#define DMSTATI_FUNCT7 0b0000100
18#define DMSTAT_FUNCT7 0b0000101
19#define DMSTR_FUNCT7 0b0000110
20#define DMREP_FUNCT7 0b0000111
21
23typedef uint32_t snrt_dma_txid_t;
24
32inline uint32_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src,
33 size_t size) {
34 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
35 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
36 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
37 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
38 register uint32_t reg_size asm("a4") = size; // 14
39
40 // dmsrc a2, a3
41 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
42 XDMA_FUNCT3, 0, OP_CUSTOM1)),
43 "r"(reg_src_high), "r"(reg_src_low));
44
45 // dmdst a0, a1
46 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
47 XDMA_FUNCT3, 0, OP_CUSTOM1)),
48 "r"(reg_dst_high), "r"(reg_dst_low));
49
50 // dmcpyi a0, a4, 0b00
51 register uint32_t reg_txid asm("a0"); // 10
52 asm volatile(".word %1\n"
53 : "=r"(reg_txid)
54 : "i"(R_TYPE_ENCODE(DMCPYI_FUNCT7, 0b00000, 14, XDMA_FUNCT3,
55 10, OP_CUSTOM1)),
56 "r"(reg_size));
57
58 return reg_txid;
59}
60
68inline snrt_dma_txid_t snrt_dma_start_1d(void *dst, const void *src,
69 size_t size) {
70 return snrt_dma_start_1d_wideptr((size_t)dst, (size_t)src, size);
71}
72
85inline snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src,
86 size_t size, size_t dst_stride,
87 size_t src_stride,
88 size_t repeat) {
89 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
90 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
91 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
92 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
93 register uint32_t reg_size asm("a4") = size; // 14
94 register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15
95 register uint32_t reg_src_stride asm("a6") = src_stride; // 16
96 register uint32_t reg_repeat asm("a7") = repeat; // 17
97
98 // dmsrc a0, a1
99 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
100 XDMA_FUNCT3, 0, OP_CUSTOM1)),
101 "r"(reg_src_high), "r"(reg_src_low));
102
103 // dmdst a0, a1
104 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
105 XDMA_FUNCT3, 0, OP_CUSTOM1)),
106 "r"(reg_dst_high), "r"(reg_dst_low));
107
108 // dmstr a5, a6
109 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSTR_FUNCT7, 15, 16,
110 XDMA_FUNCT3, 0, OP_CUSTOM1)),
111 "r"(reg_src_stride), "r"(reg_dst_stride));
112
113 // dmrep a7
114 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMREP_FUNCT7, 0, 17,
115 XDMA_FUNCT3, 0, OP_CUSTOM1)),
116 "r"(reg_repeat));
117
118 // dmcpyi a0, a4, 0b10
119 register uint32_t reg_txid asm("a0"); // 10
120 asm volatile(".word %1\n"
121 : "=r"(reg_txid)
122 : "i"(R_TYPE_ENCODE(DMCPYI_FUNCT7, 0b00010, 14, XDMA_FUNCT3,
123 10, OP_CUSTOM1)),
124 "r"(reg_size));
125
126 return reg_txid;
127}
128
141inline snrt_dma_txid_t snrt_dma_start_2d(void *dst, const void *src,
142 size_t size, size_t dst_stride,
143 size_t src_stride, size_t repeat) {
144 return snrt_dma_start_2d_wideptr((size_t)dst, (size_t)src, size, dst_stride,
145 src_stride, repeat);
146}
147
158 uint64_t src,
159 size_t size,
160 uint32_t channel) {
161 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
162 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
163 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
164 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
165 register uint32_t reg_size asm("a4") = size; // 14
166 register uint32_t cfg asm("a5") = channel << 2; // 15
167
168 // dmsrc a2, a3
169 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
170 XDMA_FUNCT3, 0, OP_CUSTOM1)),
171 "r"(reg_src_high), "r"(reg_src_low));
172
173 // dmdst a0, a1
174 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
175 XDMA_FUNCT3, 0, OP_CUSTOM1)),
176 "r"(reg_dst_high), "r"(reg_dst_low));
177
178 // dmcpy a0, a4, a5
179 register uint32_t reg_txid asm("a0"); // 10
180 asm volatile(
181 ".word %1\n"
182 : "=r"(reg_txid)
183 : "i"(R_TYPE_ENCODE(DMCPY_FUNCT7, 15, 14, XDMA_FUNCT3, 10, OP_CUSTOM1)),
184 "r"(reg_size), "r"(cfg));
185
186 return reg_txid;
187}
188
198inline snrt_dma_txid_t snrt_dma_start_1d_channel(void *dst, const void *src,
199 size_t size,
200 uint32_t channel) {
201 return snrt_dma_start_1d_channel_wideptr((size_t)dst, (size_t)src, size,
202 channel);
203}
204
220 uint64_t dst, uint64_t src, size_t size, size_t dst_stride,
221 size_t src_stride, size_t repeat, uint32_t channel) {
222 register uint32_t reg_dst_low asm("a0") = dst >> 0; // 10
223 register uint32_t reg_dst_high asm("a1") = dst >> 32; // 11
224 register uint32_t reg_src_low asm("a2") = src >> 0; // 12
225 register uint32_t reg_src_high asm("a3") = src >> 32; // 13
226 register uint32_t reg_size asm("a4") = size; // 14
227 register uint32_t reg_dst_stride asm("a5") = dst_stride; // 15
228 register uint32_t reg_src_stride asm("a6") = src_stride; // 16
229 register uint32_t reg_repeat asm("a7") = repeat; // 17
230 register uint32_t cfg asm("t2") = channel << 2 | 2; // 7
231
232 // dmsrc a0, a1
233 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSRC_FUNCT7, 13, 12,
234 XDMA_FUNCT3, 0, OP_CUSTOM1)),
235 "r"(reg_src_high), "r"(reg_src_low));
236
237 // dmdst a0, a1
238 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMDST_FUNCT7, 11, 10,
239 XDMA_FUNCT3, 0, OP_CUSTOM1)),
240 "r"(reg_dst_high), "r"(reg_dst_low));
241
242 // dmstr a5, a6
243 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMSTR_FUNCT7, 15, 16,
244 XDMA_FUNCT3, 0, OP_CUSTOM1)),
245 "r"(reg_src_stride), "r"(reg_dst_stride));
246
247 // dmrep a7
248 asm volatile(".word %0\n" ::"i"(R_TYPE_ENCODE(DMREP_FUNCT7, 0, 17,
249 XDMA_FUNCT3, 0, OP_CUSTOM1)),
250 "r"(reg_repeat));
251
252 // dmcpy a0, a4, t2
253 register uint32_t reg_txid asm("a0"); // 10
254 asm volatile(
255 ".word %1\n"
256 : "=r"(reg_txid)
257 : "i"(R_TYPE_ENCODE(DMCPY_FUNCT7, 7, 14, XDMA_FUNCT3, 10, OP_CUSTOM1)),
258 "r"(cfg), "r"(reg_size));
259
260 return reg_txid;
261}
262
277inline snrt_dma_txid_t snrt_dma_start_2d_channel(void *dst, const void *src,
278 size_t size, size_t dst_stride,
279 size_t src_stride,
280 size_t repeat,
281 uint32_t channel) {
282 return snrt_dma_start_2d_channel_wideptr((size_t)dst, (size_t)src, size,
283 dst_stride, src_stride, repeat,
284 channel);
285}
286
292 // dmstati t0, 0 # 0=status.completed_id
293 asm volatile(
294 "1: \n"
295 ".word %0\n"
296 "bltu t0, %1, 1b \n" ::"i"(
297 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
298 "r"(tid)
299 : "t0");
300}
301
306inline void snrt_dma_wait_channel(snrt_dma_txid_t tid, uint32_t channel) {
307 // dmstati t0, 0 # 0=status.completed_id
308 register uint32_t cfg asm("t1") = channel << 2;
309 asm volatile(
310 "1: \n"
311 ".word %0\n"
312 "sub t0, t0, %1 \n"
313 "blez t0, 1b \n" ::"i"(
314 R_TYPE_ENCODE(DMSTAT_FUNCT7, 6, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
315 "r"(tid), "r"(cfg)
316 : "t0");
317}
318
322inline void snrt_dma_wait_all() {
323 // dmstati t0, 2 # 2=status.busy
324 asm volatile(
325 "1: \n"
326 ".word %0\n"
327 "bne t0, zero, 1b \n" ::"i"(
328 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b10, 0, XDMA_FUNCT3, 5, OP_CUSTOM1))
329 : "t0");
330}
331
336inline void snrt_dma_wait_all_channel(uint32_t channel) {
337 register uint32_t tmp;
338 // dmstati t0, 2 # 2=status.busy
339 register uint32_t cfg asm("t1") = channel << 2 | 2;
340 asm volatile(
341 "1: \n"
342 ".word %0\n"
343 "bne t0, zero, 1b \n" ::"i"(
344 R_TYPE_ENCODE(DMSTAT_FUNCT7, 6, 0, XDMA_FUNCT3, 5, OP_CUSTOM1)),
345 "r"(cfg)
346 : "t0");
347}
348
353inline void snrt_dma_wait_all_channels(uint32_t num_channels) {
354 register uint32_t tmp;
355 // dmstati t0, 2 # 2=status.busy
356 for (int c = 0; c < num_channels; c++) {
358 }
359}
360
368 // dmstati zero, 0
369 asm volatile(".word %0\n" ::"i"(
370 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 0, OP_CUSTOM1)));
371}
372
380 asm volatile(".word %0\n" ::"i"(
381 R_TYPE_ENCODE(DMSTATI_FUNCT7, 0b00, 0, XDMA_FUNCT3, 3, OP_CUSTOM1)));
382}
383
390inline void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len) {
391 // set first 64bytes to value
392 // memset(ptr, value, 64);
393 uint8_t *p = ptr;
394 uint32_t nbytes = 64;
395 while (nbytes--) {
396 *p++ = value;
397 }
398
399 // DMA copy the the rest
400 snrt_dma_txid_t memset_txid =
401 snrt_dma_start_2d(ptr, ptr, 64, 64, 0, len / 64);
403}
404
413inline snrt_dma_txid_t snrt_dma_load_1d_tile(void *dst, void *src,
414 size_t tile_idx, size_t tile_size,
415 uint32_t prec) {
416 size_t tile_nbytes = tile_size * prec;
417 return snrt_dma_start_1d(dst, src + tile_idx * tile_nbytes, tile_nbytes);
418}
419
428inline snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src,
429 size_t tile_idx, size_t tile_size,
430 uint32_t prec) {
431 size_t tile_nbytes = tile_size * prec;
432 return snrt_dma_start_1d(dst + tile_idx * tile_nbytes, src, tile_nbytes);
433}
434
450 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
451 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
452 uint32_t prec) {
453 size_t src_offset = 0;
454 // Advance src array in x0 and x1 dimensions, and convert to byte offset
455 src_offset += tile_x0_idx * tile_x0_size;
456 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
457 src_offset *= prec;
458 // Initiate transfer
459 return snrt_dma_start_2d(dst, // dst
460 src + src_offset, // src
461 tile_x0_size * prec, // size
462 tile_x0_size * prec, // dst_stride
463 full_x0_size * prec, // src_stride
464 tile_x1_size // repeat
465 );
466}
467
483 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
484 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
485 uint32_t prec) {
486 size_t dst_offset = 0;
487 // Advance dst array in x0 and x1 dimensions, and convert to byte offset
488 dst_offset += tile_x0_idx * tile_x0_size;
489 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
490 dst_offset *= prec;
491 // Initiate transfer
492 return snrt_dma_start_2d(dst + dst_offset, // dst
493 src, // src
494 tile_x0_size * prec, // size
495 full_x0_size * prec, // dst_stride
496 tile_x0_size * prec, // src_stride
497 tile_x1_size // repeat
498 );
499}
void snrt_dma_wait_all()
Block until all DMA operation ceases.
Definition dma.h:322
snrt_dma_txid_t snrt_dma_start_1d_channel(void *dst, const void *src, size_t size, uint32_t channel)
Start an asynchronous 1D DMA transfer with native-size pointers on a specific channel.
Definition dma.h:198
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
Store a 2D tile to a 2D array.
Definition dma.h:482
uint32_t snrt_dma_start_1d_wideptr(uint64_t dst, uint64_t src, size_t size)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers.
Definition dma.h:32
snrt_dma_txid_t snrt_dma_load_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Load a tile of a 1D array.
Definition dma.h:413
snrt_dma_txid_t snrt_dma_start_2d(void *dst, const void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat)
Start an asynchronous 2D DMA transfer with native-size pointers.
Definition dma.h:141
uint32_t snrt_dma_txid_t
A DMA transfer identifier.
Definition dma.h:23
snrt_dma_txid_t snrt_dma_start_1d_channel_wideptr(uint64_t dst, uint64_t src, size_t size, uint32_t channel)
Start an asynchronous 1D DMA transfer with 64-bit wide pointers on a specific channel.
Definition dma.h:157
void snrt_dma_wait(snrt_dma_txid_t tid)
Block until a DMA transfer finishes.
Definition dma.h:291
snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Store a tile to a 1D array.
Definition dma.h:428
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec)
Load a 2D tile of a 2D array.
Definition dma.h:449
void snrt_dma_wait_all_channels(uint32_t num_channels)
Block until the first num_channels channels are idle.
Definition dma.h:353
void snrt_dma_wait_all_channel(uint32_t channel)
Block until a specific DMA channel is idle.
Definition dma.h:336
snrt_dma_txid_t snrt_dma_start_2d_channel(void *dst, const void *src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t channel)
Start an asynchronous 2D DMA transfer with native-size pointers on a specific channel.
Definition dma.h:277
void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len)
Fast memset function performed by DMA.
Definition dma.h:390
snrt_dma_txid_t snrt_dma_start_1d(void *dst, const void *src, size_t size)
Start an asynchronous 1D DMA transfer with native-size pointers.
Definition dma.h:68
void snrt_dma_wait_channel(snrt_dma_txid_t tid, uint32_t channel)
Block until a DMA transfer finishes on a specific channel.
Definition dma.h:306
void snrt_dma_start_tracking()
Start tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:367
snrt_dma_txid_t snrt_dma_start_2d_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers.
Definition dma.h:85
snrt_dma_txid_t snrt_dma_start_2d_channel_wideptr(uint64_t dst, uint64_t src, size_t size, size_t dst_stride, size_t src_stride, size_t repeat, uint32_t channel)
Start an asynchronous 2D DMA transfer with 64-bit wide pointers on a specific channel.
Definition dma.h:219
void snrt_dma_stop_tracking()
Stop tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:379