Snitch Runtime
Loading...
Searching...
No Matches
dma.h
Go to the documentation of this file.
1// Copyright 2023 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
10#pragma once
11
12#include <math.h>
13
15typedef uint32_t snrt_dma_txid_t;
16
31static inline uint32_t snrt_dma_start_1d(uint64_t dst, uint64_t src,
32 size_t size,
33 const uint32_t channel = 0) {
34 uint32_t dst_lo = dst & 0xFFFFFFFF;
35 uint32_t dst_hi = dst >> 32;
36 uint32_t src_lo = src & 0xFFFFFFFF;
37 uint32_t src_hi = src >> 32;
38 uint32_t txid;
39
40 asm volatile(
41 "dmsrc %[src_lo], %[src_hi] \n"
42 "dmdst %[dst_lo], %[dst_hi] \n"
43 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b00 \n"
44 : [ txid ] "=r"(txid)
45 : [ src_lo ] "r"(src_lo), [ src_hi ] "r"(src_hi),
46 [ dst_lo ] "r"(dst_lo), [ dst_hi ] "r"(dst_hi), [ size ] "r"(size),
47 [ channel ] "i"(channel));
48
49 return txid;
50}
51
59static inline uint32_t snrt_dma_start_1d(volatile void *dst, volatile void *src,
60 size_t size,
61 const uint32_t channel = 0) {
62 return snrt_dma_start_1d((uint64_t)dst, (uint64_t)src, size, channel);
63}
64
69inline void snrt_dma_enable_mcast(uint32_t mask) {
70 asm volatile("dmuser %[mask], zero \n" : : [ mask ] "r"(mask));
71}
72
77inline void snrt_dma_disable_mcast() { asm volatile("dmuser zero, zero \n"); }
78
86static inline uint32_t snrt_dma_start_1d_mcast(uint64_t dst, uint64_t src,
87 size_t size, uint32_t mask,
88 const uint32_t channel = 0) {
90 uint32_t txid = snrt_dma_start_1d(dst, src, size, channel);
92 return txid;
93}
94
104static inline uint32_t snrt_dma_start_1d_mcast(volatile void *dst,
105 volatile void *src, size_t size,
106 uint32_t mask,
107 const uint32_t channel = 0) {
108 return snrt_dma_start_1d_mcast((uint64_t)dst, (uint64_t)src, size, mask,
109 channel);
110}
111
130static inline snrt_dma_txid_t snrt_dma_start_2d(uint64_t dst, uint64_t src,
131 size_t size, size_t dst_stride,
132 size_t src_stride,
133 size_t repeat,
134 const uint32_t channel = 0) {
135 uint32_t dst_lo = dst & 0xFFFFFFFF;
136 uint32_t dst_hi = dst >> 32;
137 uint32_t src_lo = src & 0xFFFFFFFF;
138 uint32_t src_hi = src >> 32;
139 uint32_t txid;
140
141 asm volatile(
142 "dmsrc %[src_lo], %[src_hi] \n"
143 "dmdst %[dst_lo], %[dst_hi] \n"
144 "dmstr %[src_stride], %[dst_stride] \n"
145 "dmrep %[repeat] \n"
146 "dmcpyi %[txid], %[size], (%[channel] << 2) | 0b10 \n"
147 : [ txid ] "=r"(txid)
148 : [ src_lo ] "r"(src_lo), [ src_hi ] "r"(src_hi),
149 [ dst_lo ] "r"(dst_lo), [ dst_hi ] "r"(dst_hi),
150 [ dst_stride ] "r"(dst_stride), [ src_stride ] "r"(src_stride),
151 [ repeat ] "r"(repeat), [ size ] "r"(size), [ channel ] "i"(channel));
152
153 return txid;
154}
155
164static inline uint32_t snrt_dma_start_2d(volatile void *dst, volatile void *src,
165 size_t size, size_t dst_stride,
166 size_t src_stride, size_t repeat,
167 const uint32_t channel = 0) {
168 return snrt_dma_start_2d((uint64_t)dst, (uint64_t)src, size, dst_stride,
169 src_stride, repeat, channel);
170}
171
182static inline void snrt_dma_wait(snrt_dma_txid_t txid,
183 const uint32_t channel = 0) {
184 asm volatile(
185 "1: \n"
186 "dmstati t0, (%[channel] << 2) | 0 \n"
187 "bltu t0, %[txid], 1b \n"
188 :
189 : [ txid ] "r"(txid), [ channel ] "i"(channel)
190 : "t0");
191}
192
202static inline void snrt_dma_wait_all(const uint32_t channel = 0) {
203 uint32_t busy;
204 asm volatile(
205 "1: \n"
206 "dmstati %[busy], (%[channel] << 2) | 2 \n"
207 "bne %[busy], zero, 1b \n"
208 : [ busy ] "=r"(busy)
209 : [ channel ] "i"(channel));
210}
211
216inline void snrt_dma_wait_all_channels(uint32_t num_channels) {
217 for (int c = 0; c < num_channels; c++) {
218 snrt_dma_wait_all(c);
219 }
220}
221
228inline void snrt_dma_start_tracking() { asm volatile("dmstati zero, 0 \n"); }
229
236inline void snrt_dma_stop_tracking() { asm volatile("dmstati zero, 0 \n"); }
237
244inline void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len) {
245 // set first 64bytes to value
246 // memset(ptr, value, 64);
247 uint8_t *p = (uint8_t *)ptr;
248 uint32_t nbytes = 64;
249 while (nbytes--) {
250 *p++ = value;
251 }
252
253 // DMA copy the the rest
254 snrt_dma_txid_t memset_txid =
255 snrt_dma_start_2d(ptr, ptr, 64, 64, 0, len / 64);
256 snrt_dma_wait_all();
257}
258
267inline snrt_dma_txid_t snrt_dma_load_1d_tile(volatile void *dst,
268 volatile void *src,
269 size_t tile_idx, size_t tile_size,
270 uint32_t prec) {
271 size_t tile_nbytes = tile_size * prec;
272 return snrt_dma_start_1d(
273 (uint64_t)dst, (uint64_t)src + tile_idx * tile_nbytes, tile_nbytes);
274}
275
286 size_t tile_idx,
287 size_t tile_size,
288 uint32_t prec,
289 uint32_t mcast) {
290 size_t tile_nbytes = tile_size * prec;
291 return snrt_dma_start_1d_mcast((uintptr_t)dst,
292 (uintptr_t)src + tile_idx * tile_nbytes,
293 tile_nbytes, mcast);
294}
295
304inline snrt_dma_txid_t snrt_dma_1d_to_2d(volatile void *dst, volatile void *src,
305 size_t size, size_t row_size,
306 size_t stride) {
307 return snrt_dma_start_2d(dst, src, row_size, stride, row_size,
308 size / row_size);
309}
310
319inline snrt_dma_txid_t snrt_dma_2d_to_1d(volatile void *dst, volatile void *src,
320 size_t size, size_t row_size,
321 size_t stride) {
322 return snrt_dma_start_2d(dst, src, row_size, row_size, stride,
323 size / row_size);
324}
325
334inline snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src,
335 size_t tile_idx, size_t tile_size,
336 uint32_t prec) {
337 size_t tile_nbytes = tile_size * prec;
338 return snrt_dma_start_1d((uint64_t)dst + tile_idx * tile_nbytes,
339 (uint64_t)src, tile_nbytes);
340}
341
358 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
359 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
360 uint32_t prec, size_t tile_ld) {
361 size_t src_offset = 0;
362 // Advance src array in x0 and x1 dimensions, and convert to byte offset
363 src_offset += tile_x0_idx * tile_x0_size;
364 src_offset += tile_x1_idx * tile_x1_size * full_x0_size;
365 src_offset *= prec;
366 // Initiate transfer
367 return snrt_dma_start_2d((uint64_t)dst, // dst
368 (uint64_t)src + src_offset, // src
369 tile_x0_size * prec, // size
370 tile_ld, // dst_stride
371 full_x0_size * prec, // src_stride
372 tile_x1_size // repeat
373 );
374}
375
377 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
378 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
379 uint32_t prec) {
380 return snrt_dma_load_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
381 tile_x1_size, tile_x0_size, full_x0_size, prec,
382 tile_x0_size * prec);
383}
384
402 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
403 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
404 uint32_t prec, size_t num_banks) {
405 // Calculate new tile size after reshaping the tile in the selected banks
406 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
407 size_t tile_x1_size_in_banks =
408 ceil((tile_x1_size * tile_x0_size) / (double)tile_x0_size_in_banks);
409 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
410 return snrt_dma_load_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
411 tile_x1_size_in_banks, tile_x0_size_in_banks,
412 full_x0_size, prec, tile_ld);
413}
414
431 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
432 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
433 uint32_t prec, size_t tile_ld) {
434 size_t dst_offset = 0;
435 // Advance dst array in x0 and x1 dimensions, and convert to byte offset
436 dst_offset += tile_x0_idx * tile_x0_size;
437 dst_offset += tile_x1_idx * tile_x1_size * full_x0_size;
438 dst_offset *= prec;
439 // Initiate transfer
440 return snrt_dma_start_2d((uint64_t)dst + dst_offset, // dst
441 (uint64_t)src, // src
442 tile_x0_size * prec, // size
443 full_x0_size * prec, // dst_stride
444 tile_ld, // src_stride
445 tile_x1_size // repeat
446 );
447}
448
450 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
451 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
452 uint32_t prec) {
453 return snrt_dma_store_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
454 tile_x1_size, tile_x0_size, full_x0_size,
455 prec, tile_x0_size * prec);
456}
457
458inline snrt_dma_txid_t snrt_dma_store_2d_tile_from_banks(
459 void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx,
460 size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size,
461 uint32_t prec, size_t num_banks) {
462 // Calculate new tile size after reshaping the tile in the selected banks
463 size_t tile_x0_size_in_banks = (num_banks * SNRT_TCDM_BANK_WIDTH) / prec;
464 size_t tile_x1_size_in_banks =
465 ceil((tile_x1_size * tile_x0_size) / (double)tile_x0_size_in_banks);
466 size_t tile_ld = SNRT_TCDM_HYPERBANK_WIDTH;
467 return snrt_dma_store_2d_tile(dst, src, tile_x1_idx, tile_x0_idx,
468 tile_x1_size_in_banks, tile_x0_size_in_banks,
469 full_x0_size, prec, tile_ld);
470}
void snrt_dma_enable_mcast(uint32_t mask)
Enable multicast for successive transfers.
Definition dma.h:69
snrt_dma_txid_t snrt_dma_load_1d_tile(volatile void *dst, volatile void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Load a tile of a 1D array.
Definition dma.h:267
snrt_dma_txid_t snrt_dma_1d_to_2d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 1D array into a 2D array.
Definition dma.h:304
snrt_dma_txid_t snrt_dma_mcast_load_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec, uint32_t mcast)
Load a tile of a 1D array.
Definition dma.h:285
uint32_t snrt_dma_txid_t
A DMA transfer identifier.
Definition dma.h:15
snrt_dma_txid_t snrt_dma_store_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Store a 2D tile to a 2D array.
Definition dma.h:430
snrt_dma_txid_t snrt_dma_load_2d_tile_in_banks(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t num_banks)
Load a 2D tile of a 2D array and reshape it to occupy a subset of TCDM banks.
Definition dma.h:401
snrt_dma_txid_t snrt_dma_store_1d_tile(void *dst, void *src, size_t tile_idx, size_t tile_size, uint32_t prec)
Store a tile to a 1D array.
Definition dma.h:334
snrt_dma_txid_t snrt_dma_2d_to_1d(volatile void *dst, volatile void *src, size_t size, size_t row_size, size_t stride)
Transfer and reshape a 2D array into a 1D array.
Definition dma.h:319
void snrt_dma_wait_all_channels(uint32_t num_channels)
Block until the first num_channels channels are idle.
Definition dma.h:216
void snrt_dma_memset(void *ptr, uint8_t value, uint32_t len)
Fast memset function performed by DMA.
Definition dma.h:244
snrt_dma_txid_t snrt_dma_load_2d_tile(void *dst, void *src, size_t tile_x1_idx, size_t tile_x0_idx, size_t tile_x1_size, size_t tile_x0_size, size_t full_x0_size, uint32_t prec, size_t tile_ld)
Load a 2D tile of a 2D array.
Definition dma.h:357
void snrt_dma_start_tracking()
Start tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:228
void snrt_dma_disable_mcast()
Disable multicast for successive transfers.
Definition dma.h:77
void snrt_dma_stop_tracking()
Stop tracking of dma performance region. Does not have any implications on the HW....
Definition dma.h:236