Snitch Runtime
Loading...
Searching...
No Matches
dm.h
1// Copyright 2021 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
5#ifndef DM_H
6#define DM_H
7
8//================================================================================
9// Settings
10//================================================================================
11
20// #define DM_USE_GLOBAL_CLINT
21
27#define DM_TASK_QUEUE_SIZE 4
28
29//================================================================================
30// Macros
31//================================================================================
32
33#define _dm_mtx_lock() snrt_mutex_acquire(&dm_p->mutex)
34#define _dm_mtx_release() snrt_mutex_release(&dm_p->mutex)
35
39#define DM_STATUS_COMPLETE_ID 0
40#define DM_STATUS_NEXT_ID 1
41#define DM_STATUS_BUSY 2
42#define DM_STATUS_WOULD_BLOCK 3
43
44//================================================================================
45// Debug
46//================================================================================
47
48// #define DM_DEBUG_LEVEL 100
49
50#ifdef DM_DEBUG_LEVEL
51#include "printf.h"
52#define _DM_PRINTF(...) \
53 if (1) { \
54 printf("[dm] "__VA_ARGS__); \
55 }
56#define DM_PRINTF(d, ...) \
57 if (DM_DEBUG_LEVEL >= d) { \
58 _DM_PRINTF(__VA_ARGS__); \
59 }
60#else
61#define DM_PRINTF(d, ...)
62#endif
63
64//================================================================================
65// Types
66//================================================================================
67
68typedef struct {
69 uint64_t src;
70 uint64_t dst;
71 uint32_t size;
72 uint32_t sstrd;
73 uint32_t dstrd;
74 uint32_t nreps;
75 uint32_t cfg;
76 uint32_t twod;
77} dm_task_t;
78
79// used for ultra-fine grained communication
80// stat_q can be used to request a command, 0 is no command
81// the response is put into stat_p and is valid iff stat_pvalid is non-zero
82typedef enum en_stat {
83 // commands the DM core to wait until all transfers are complete
84 STAT_WAIT_IDLE = 1,
85 // abort and exit
86 STAT_EXIT = 2,
87 // poll if DM is ready
88 STAT_READY = 3,
89} en_stat_t;
90
91typedef struct {
92 dm_task_t queue[DM_TASK_QUEUE_SIZE];
93 uint32_t queue_back;
94 uint32_t queue_front;
95 volatile uint32_t queue_fill;
96 volatile uint32_t mutex;
97 volatile en_stat_t stat_q;
98 volatile uint32_t stat_p;
99 volatile uint32_t stat_pvalid;
100 volatile uint32_t dm_wfi;
101} dm_t;
102
103//================================================================================
104// Data
105//================================================================================
106
111extern __thread volatile dm_t *dm_p;
116extern volatile dm_t *volatile dm_p_global;
117
118//================================================================================
119// Functions
120//================================================================================
121
122#ifdef DM_USE_GLOBAL_CLINT
123static inline void wfi_dm(uint32_t cluster_core_idx) {
124 (void)cluster_core_idx;
125 snrt_int_sw_poll();
126}
127static inline void wake_dm(void) {
128 uint32_t basehart = snrt_cluster_core_base_hartid();
129 snrt_int_sw_set(basehart + snrt_cluster_dm_core_idx());
130}
131#else
132static inline void wfi_dm(uint32_t cluster_core_idx) {
133 __atomic_add_fetch(&dm_p->dm_wfi, 1, __ATOMIC_RELAXED);
134 snrt_wfi();
135 snrt_int_cluster_clr(1 << cluster_core_idx);
136 __atomic_add_fetch(&dm_p->dm_wfi, -1, __ATOMIC_RELAXED);
137}
138static inline void wake_dm(void) {
139 // wait for DM to sleep before sending wakeup
140 while (!__atomic_load_n(&dm_p->dm_wfi, __ATOMIC_RELAXED))
141 ;
142 snrt_int_cluster_set(1 << snrt_cluster_compute_core_num());
143}
144#endif // #ifdef DM_USE_GLOBAL_CLINT
145
152inline void dm_init(void) {
153 // create a data mover instance
154 if (snrt_is_dm_core()) {
155#ifdef DM_USE_GLOBAL_CLINT
156 snrt_interrupt_enable(IRQ_M_SOFT);
157#else
158 snrt_interrupt_enable(IRQ_M_CLUSTER);
159#endif
160 dm_p = (dm_t *)snrt_l1_alloc(sizeof(dm_t));
161 snrt_dma_memset((void *)dm_p, 0, sizeof(dm_t));
162 dm_p_global = dm_p;
163 } else {
164 while (!dm_p_global)
165 ;
166 dm_p = dm_p_global;
167 }
168}
169
174inline void dm_main(void) {
175#ifdef SNRT_SUPPORTS_DMA
176 volatile dm_task_t *t;
177 uint32_t do_exit = 0;
178 uint32_t cluster_core_idx = snrt_cluster_core_idx();
179
180 DM_PRINTF(10, "enter main\n");
181
182 while (!do_exit) {
184 if (dm_p->queue_fill) {
185 // wait until DMA is ready
186 while (__builtin_sdma_stat(DM_STATUS_WOULD_BLOCK))
187 ;
188
189 t = &dm_p->queue[dm_p->queue_back];
190
191 if (t->twod) {
192 DM_PRINTF(10, "start twod\n");
193 __builtin_sdma_start_twod(t->src, t->dst, t->size, t->sstrd,
194 t->dstrd, t->nreps, t->cfg);
195 } else {
196 DM_PRINTF(10, "start oned\n");
197 __builtin_sdma_start_oned(t->src, t->dst, t->size, t->cfg);
198 }
199
200 // bump
201 dm_p->queue_back = (dm_p->queue_back + 1) % DM_TASK_QUEUE_SIZE;
202 __atomic_add_fetch(&dm_p->queue_fill, -1, __ATOMIC_RELAXED);
203 }
204
206 if (dm_p->stat_q) {
207 switch (dm_p->stat_q) {
208 case STAT_WAIT_IDLE:
209 // check status and set pvalid if DMA is idle and clear
210 // request
211 if (__builtin_sdma_stat(DM_STATUS_BUSY) == 0) {
212 DM_PRINTF(50, "idle\n");
213 dm_p->stat_pvalid = 1;
214 dm_p->stat_q = (en_stat_t)0;
215 }
216 break;
217 case STAT_EXIT:
218 do_exit = 1;
219 break;
220 case STAT_READY:
221 DM_PRINTF(50, "ready\n");
222 dm_p->stat_pvalid = 1;
223 dm_p->stat_q = (en_stat_t)0;
224 break;
225 }
226 }
227
228 // sleep if queue is empty and no stats pending
229 if (!dm_p->queue_fill && !dm_p->stat_q) {
230 wfi_dm(cluster_core_idx);
231 }
232 }
233 DM_PRINTF(10, "dm: exit\n");
234#endif
235#ifdef DM_USE_GLOBAL_CLINT
236 snrt_interrupt_disable(IRQ_M_SOFT);
237#else
238 snrt_interrupt_disable(IRQ_M_CLUSTER);
239#endif
240 return;
241}
242
247inline void dm_exit(void) {
248 dm_p->stat_q = STAT_EXIT;
249 // signal data mover
250 wake_dm();
251}
252
263inline void dm_memcpy_async(void *dest, const void *src, size_t n) {
264 uint32_t s;
265 volatile dm_task_t *t;
266
267 DM_PRINTF(10, "dm_memcpy_async %#x -> %#x size %d\n", src, dest,
268 (uint32_t)n);
269
270 // poll queue size
271 do {
272 s = __atomic_load_n(&dm_p->queue_fill, __ATOMIC_RELAXED);
273 } while (s >= DM_TASK_QUEUE_SIZE);
274 _dm_mtx_lock();
275
276 // insert
277 t = &dm_p->queue[dm_p->queue_front];
278 t->src = (uint64_t)src;
279 t->dst = (uint64_t)dest;
280 t->size = (uint32_t)n;
281 t->twod = 0;
282 t->cfg = 0;
283
284 // bump
285 __atomic_add_fetch(&dm_p->queue_fill, 1, __ATOMIC_RELAXED);
286 dm_p->queue_front = (dm_p->queue_front + 1) % DM_TASK_QUEUE_SIZE;
287
288 _dm_mtx_release();
289}
290
304inline void dm_memcpy2d_async(uint64_t src, uint64_t dst, uint32_t size,
305 uint32_t sstrd, uint32_t dstrd, uint32_t nreps,
306 uint32_t cfg) {
307 uint32_t s;
308 volatile dm_task_t *t;
309
310 DM_PRINTF(10, "dm_memcpy2d_async %#x -> %#x size %d\n", src, dst,
311 (uint32_t)size);
312
313 // poll queue size
314 do {
315 s = __atomic_load_n(&dm_p->queue_fill, __ATOMIC_RELAXED);
316 } while (s >= DM_TASK_QUEUE_SIZE);
317 _dm_mtx_lock();
318
319 // insert
320 t = &dm_p->queue[dm_p->queue_front];
321 t->src = src;
322 t->dst = dst;
323 t->size = size;
324 t->sstrd = sstrd;
325 t->dstrd = dstrd;
326 t->nreps = nreps;
327 t->twod = 1;
328 t->cfg = cfg;
329
330 // bump
331 __atomic_add_fetch(&dm_p->queue_fill, 1, __ATOMIC_RELAXED);
332 dm_p->queue_front = (dm_p->queue_front + 1) % DM_TASK_QUEUE_SIZE;
333
334 _dm_mtx_release();
335}
336
341inline void dm_start(void) { wake_dm(); }
342
347inline void dm_wait(void) {
348 uint32_t s;
349
350 // signal data mover
351 wake_dm();
352
353 // first, wait for the dm queue to be empty and no request be pending
354 do {
355 s = __atomic_load_n(&dm_p->queue_fill, __ATOMIC_RELAXED);
356 } while (s != 0);
357 while (dm_p->stat_q)
358 ;
359
360 // then, issue the STAT_WAIT_IDLE request so the DM core polls for the DMA
361 // to be idle
362 _dm_mtx_lock();
363 dm_p->stat_pvalid = 0;
364 // this is the request
365 dm_p->stat_q = STAT_WAIT_IDLE;
366 // signal data mover
367 wake_dm();
368 // whenever stat_pvalid is non-zero, the DMA has completed all transfers
369 while (!dm_p->stat_pvalid)
370 ;
371 _dm_mtx_release();
372}
373
378inline void dm_wait_ready(void) {
379 _dm_mtx_lock();
380 dm_p->stat_pvalid = 0;
381 dm_p->stat_q = STAT_READY;
382 wake_dm();
383 while (!dm_p->stat_pvalid)
384 ;
385 _dm_mtx_release();
386}
387
388#endif /* DM_H */
Definition dm.h:91
Definition dm.h:68