Snitch Runtime
Loading...
Searching...
No Matches
dma.h
Go to the documentation of this file.
1// Copyright 2023 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
10#pragma once
11
12#include <math.h>
13
15typedef uint32_t snrt_dma_txid_t;
16
31static inline uint32_t snrt_dma_start_1d(uint64_t dst, uint64_t src,
32 size_t size,
33 const uint32_t channel = 0) {
34 uint32_t dst_lo = dst & 0xFFFFFFFF;
35 uint32_t dst_hi = dst >> 32;
36 uint32_t src_lo = src & 0xFFFFFFFF;
37 uint32_t src_hi = src >> 32;
38 uint32_t txid;
39
40 asm volatile(
41 "dmsrc %[src_lo], %[src_hi] \n"
42 "dmdst %[dst_lo], %[dst_hi] \n"
43 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b00 \n"
44 : [ txid ] "=r"(txid)
45 : [ src_lo ] "r"(src_lo), [ src_hi ] "r"(src_hi),
46 [ dst_lo ] "r"(dst_lo), [ dst_hi ] "r"(dst_hi), [ size ] "r"(size),
47 [ channel ] "i"(channel));
48
49 return txid;
50}
51
59static inline uint32_t snrt_dma_start_1d(volatile void *dst, volatile void *src,
60 size_t size,
61 const uint32_t channel = 0) {
62 return snrt_dma_start_1d((uint64_t)dst, (uint64_t)src, size, channel);
63}
64
69inline void snrt_dma_enable_mcast(uint32_t mask) {
70 asm volatile("dmuser %[mask], zero \n" : : [ mask ] "r"(mask));
71}
72
77inline void snrt_dma_disable_mcast() { asm volatile("dmuser zero, zero \n"); }
78
86static inline uint32_t snrt_dma_start_1d_mcast(uint64_t dst, uint64_t src,
87 size_t size, uint32_t mask,
88 const uint32_t channel = 0) {
90 uint32_t txid = snrt_dma_start_1d(dst, src, size, channel);
92 return txid;
93}
94
104static inline uint32_t snrt_dma_start_1d_mcast(volatile void *dst,
105 volatile void *src, size_t size,
106 uint32_t mask,
107 const uint32_t channel = 0) {
108 return snrt_dma_start_1d_mcast((uint64_t)dst, (uint64_t)src, size, mask,
109 channel);
110}
111
130static inline snrt_dma_txid_t snrt_dma_start_2d(uint64_t dst, uint64_t src,
131 size_t size, size_t dst_stride,
132 size_t src_stride,
133 size_t repeat,
134 const uint32_t channel = 0) {
135 uint32_t dst_lo = dst & 0xFFFFFFFF;
136 uint32_t dst_hi = dst >> 32;
137 uint32_t src_lo = src & 0xFFFFFFFF;
138 uint32_t src_hi = src >> 32;
139 uint32_t txid;
140
141 asm volatile(
142 "dmsrc %[src_lo], %[src_hi] \n"
143 "dmdst %[dst_lo], %[dst_hi] \n"
144 "dmstr %[src_stride], %[dst_stride] \n"
145 "dmrep %[repeat] \n"
146 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b10 \n"
147 : [ txid ] "=r"(txid)
148 : [ src_lo ] "r"(src_lo), [ src_hi ] "r"(src_hi),
149 [ dst_lo ] "r"(dst_lo), [ dst_hi ] "r"(dst_hi),
150 [ dst_stride ] "r"(dst_stride), [ src_stride ] "r"(src_stride),
151 [ repeat ] "r"(repeat), [ size ] "r"(size), [ channel ] "i"(channel));
152
153 return txid;
154}
155
164static inline uint32_t snrt_dma_start_2d(volatile void *dst, volatile void *src,
165 size_t size, size_t dst_stride,
166 size_t src_stride, size_t repeat,
167 const uint32_t channel = 0) {
168 return snrt_dma_start_2d((uint64_t)dst, (uint64_t)src, size, dst_stride,
169 src_stride, repeat, channel);
170}
171
182static inline void snrt_dma_wait(snrt_dma_txid_t txid,
183 const uint32_t channel = 0) {
184 asm volatile(
185 "1: \n"
186 "dmstati t0, (%[channel] << 2) | 0 \n"
187 "bltu t0, %[txid], 1b \n"
188 :
189 : [ txid ] "r"(txid), [ channel ] "i"(channel)
190 : "t0");
191}
192
202static inline void snrt_dma_wait_all(const uint32_t channel = 0) {
203 uint32_t busy;
204 asm volatile(
205 "1: \n"
206 "dmstati %[busy], (%[channel] << 2) | 2 \n"
207 "bne %[busy], zero, 1b \n"
208 : [ busy ] "=r"(busy)
209 : [ channel ] "i"(channel));
210}
211
216inline void snrt_dma_wait_all_channels(uint32_t num_channels) {
217 for (int c = 0; c < num_channels; c++) {
218 snrt_dma_wait_all(c);
219 }
220}
221
228inline void snrt_dma_start_tracking() { asm volatile("dmstati zero, 0 \n"); }
229
236inline void snrt_dma_stop_tracking() { asm volatile("dmstati zero, 0 \n"); }
237
245inline void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len) {
246 // We set the first 64 bytes to the value, and then we use the DMA to copy
247 // these into the remaining memory region. DMA is used only if len is
248 // larger than 64 bytes, and an integer multiple of 64 bytes.
249 size_t n_1d_transfers = len / 64;
250 size_t use_dma = (len % 64) == 0 && len > 64;
251 uint8_t *p = (uint8_t *)ptr;
252
253 uint32_t nbytes = len < 64 || !use_dma ? len : 64;
254 while (nbytes--) {
255 *p++ = value;
256 }
257
258 if (use_dma) {
259 snrt_dma_start_2d(ptr, ptr, 64, 64, 0, n_1d_transfers);
260 snrt_dma_wait_all();
261 }
262}
263
272inline snrt_dma_txid_t snrt_dma_load_1d_tile(volatile void *dst,
273 volatile void *src,
274 size_t tile_idx, size_t tile_size,
275 uint32_t prec) {
276 size_t tile_nbytes = tile_size * prec;
277 return snrt_dma_start_1d(
278 (uint64_t)dst, (uint64_t)src + tile_idx * tile_nbytes, tile_nbytes);
279}
280
291 size_t tile_idx,
292 size_t tile_size,
293 uint32_t prec,
294 uint32_t mcast) {
295 size_t tile_nbytes = tile_size * prec;
296 return snrt_dma_start_1d_mcast((uintptr_t)dst,
297 (uintptr_t)src + tile_idx * tile_nbytes,
298 tile_nbytes, mcast);
299}
300
309inline snrt_dma_txid_t snrt_dma_1d_to_2d(volatile void *dst, volatile void *src,
310 size_t size, size_t row_size,
311 size_t stride) {
312 return snrt_dma_start_2d(dst, src, row_size, stride, row_size,
313 size / row_size);
314}
315
324inline snrt_dma_txid_t snrt_dma_2d_to_1d(volatile void *dst, volatile void *src,
325 size_t size, size_t row_size,
326 size_t stride) {
327 return snrt_dma_start_2d(dst, src, row_size, row_size, stride,
328 size / row_size);
329}
330
339inline snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src,
340 size_t tile_idx, size_t tile_size,
341 uint32_t prec) {
342 size_t tile_nbytes = tile_size * prec;
343 return snrt_dma_start_1d((uint64_t)dst + tile_idx * tile_nbytes,
344 (uint64_t)src, tile_nbytes);
345}
346
363 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
364 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
365 uint32_t prec, size_t tile_ld) {
366 size_t src_offset = 0;
367 // Advance src array in x0 and x1 dimensions, and convert to byte offset
368 src_offset += tile_x0_idx * tile_x0_size;
369 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
370 src_offset *= prec;
371 // Initiate transfer
372 return snrt_dma_start_2d((uint64_t)dst, // dst
373 (uint64_t)src + src_offset, // src
374 tile_x0_size * prec, // size
375 tile_ld, // dst_stride
376 full_x0_size * prec, // src_stride
377 tile_x1_size // repeat
378 );
379}
380
382 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
383 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
384 uint32_t prec) {
385 return snrt_dma_load_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
386 tile_x1_size, tile_x0_size, full_x0_size, prec,
387 tile_x0_size * prec);
388}
389
407 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
408 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
409 uint32_t prec, size_t num_banks) {
410 // Calculate new tile size after reshaping the tile in the selected banks
411 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
412 size_t tile_x1_size_in_banks =
413 ceil((tile_x1_size * tile_x0_size) / (double)tile_x0_size_in_banks);
414 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
415 return snrt_dma_load_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
416 tile_x1_size_in_banks, tile_x0_size_in_banks,
417 full_x0_size, prec, tile_ld);
418}
419
436 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
437 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
438 uint32_t prec, size_t tile_ld) {
439 size_t dst_offset = 0;
440 // Advance dst array in x0 and x1 dimensions, and convert to byte offset
441 dst_offset += tile_x0_idx * tile_x0_size;
442 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
443 dst_offset *= prec;
444 // Initiate transfer
445 return snrt_dma_start_2d((uint64_t)dst + dst_offset, // dst
446 (uint64_t)src, // src
447 tile_x0_size * prec, // size
448 full_x0_size * prec, // dst_stride
449 tile_ld, // src_stride
450 tile_x1_size // repeat
451 );
452}
453
455 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
456 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
457 uint32_t prec) {
458 return snrt_dma_store_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
459 tile_x1_size, tile_x0_size, full_x0_size,
460 prec, tile_x0_size * prec);
461}
462
463inline snrt_dma_txid_t snrt_dma_store_2d_tile_from_banks(
464 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
465 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
466 uint32_t prec, size_t num_banks) {
467 // Calculate new tile size after reshaping the tile in the selected banks
468 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
469 size_t tile_x1_size_in_banks =
470 ceil((tile_x1_size * tile_x0_size) / (double)tile_x0_size_in_banks);
471 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
472 return snrt_dma_store_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
473 tile_x1_size_in_banks, tile_x0_size_in_banks,
474 full_x0_size, prec, tile_ld);
475}
void snrt_dma_enable_mcast(uint32_t mask)
Enable multicast for successive transfers.
Definition dma.h:69
snrt_dma_txid_t snrt_dma_load_1d_tile(volatile void *dst, volatile void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Load a tile of a 1D array.
Definition dma.h:272
snrt_dma_txid_t snrt_dma_1d_to_2d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 1D array into a 2D array.
Definition dma.h:309
snrt_dma_txid_t snrt_dma_mcast_load_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec, uint32_t mcast)
Load a tile of a 1D array.
Definition dma.h:290
uint32_t snrt_dma_txid_t
A DMA transfer identifier.
Definition dma.h:15
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Store a 2D tile to a 2D array.
Definition dma.h:435
snrt_dma_txid_t snrt_dma_load_2d_tile_in_banks(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t num_banks)
Load a 2D tile of a 2D array and reshape it to occupy a subset of TCDM banks.
Definition dma.h:406
snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Store a tile to a 1D array.
Definition dma.h:339
snrt_dma_txid_t snrt_dma_2d_to_1d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 2D array into a 1D array.
Definition dma.h:324
void snrt_dma_wait_all_channels(uint32_t num_channels)
Block until the first num_channels channels are idle.
Definition dma.h:216
void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len)
Fast memset function performed by DMA.
Definition dma.h:245
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Load a 2D tile of a 2D array.
Definition dma.h:362
void snrt_dma_start_tracking()
Start tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:228
void snrt_dma_disable_mcast()
Disable multicast for successive transfers.
Definition dma.h:77
void snrt_dma_stop_tracking()
Stop tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:236