Snitch Runtime
Loading...
Searching...
No Matches
dm.h
1// Copyright 2021 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
5#ifndef DM_H
6#define DM_H
7
8//================================================================================
9// Settings
10//================================================================================
11
20// #define DM_USE_GLOBAL_CLINT
21
27#define DM_TASK_QUEUE_SIZE 4
28
29//================================================================================
30// Macros
31//================================================================================
32
33#define _dm_mtx_lock() snrt_mutex_acquire(&dm_p->mutex)
34#define _dm_mtx_release() snrt_mutex_release(&dm_p->mutex)
35
39#define DM_STATUS_COMPLETE_ID 0
40#define DM_STATUS_NEXT_ID 1
41#define DM_STATUS_BUSY 2
42#define DM_STATUS_WOULD_BLOCK 3
43
44//================================================================================
45// Debug
46//================================================================================
47
48// #define DM_DEBUG_LEVEL 100
49
50#ifdef DM_DEBUG_LEVEL
51#include "printf.h"
52#define _DM_PRINTF(...) \
53 if (1) { \
54 printf("[dm] "__VA_ARGS__); \
55 }
56#define DM_PRINTF(d, ...) \
57 if (DM_DEBUG_LEVEL >= d) { \
58 _DM_PRINTF(__VA_ARGS__); \
59 }
60#else
61#define DM_PRINTF(d, ...)
62#endif
63
64//================================================================================
65// Types
66//================================================================================
67
68typedef struct {
69 uint64_t src;
70 uint64_t dst;
71 uint32_t size;
72 uint32_t sstrd;
73 uint32_t dstrd;
74 uint32_t nreps;
75 uint32_t cfg;
76 uint32_t twod;
77} dm_task_t;
78
79// used for ultra-fine grained communication
80// stat_q can be used to request a command, 0 is no command
81// the response is put into stat_p and is valid iff stat_pvalid is non-zero
82typedef enum en_stat {
83 // commands the DM core to wait until all transfers are complete
84 STAT_WAIT_IDLE = 1,
85 // abort and exit
86 STAT_EXIT = 2,
87 // poll if DM is ready
88 STAT_READY = 3,
89} en_stat_t;
90
91typedef struct {
92 dm_task_t queue[DM_TASK_QUEUE_SIZE];
93 uint32_t queue_back;
94 uint32_t queue_front;
95 volatile uint32_t queue_fill;
96 volatile uint32_t mutex;
97 volatile en_stat_t stat_q;
98 volatile uint32_t stat_p;
99 volatile uint32_t stat_pvalid;
100 volatile uint32_t dm_wfi;
101} dm_t;
102
103//================================================================================
104// Data
105//================================================================================
106
111extern __thread volatile dm_t *dm_p;
116extern volatile dm_t *volatile dm_p_global;
117
118//================================================================================
119// Functions
120//================================================================================
121
122#ifdef DM_USE_GLOBAL_CLINT
123inline void wfi_dm(uint32_t cluster_core_idx) {
124 (void)cluster_core_idx;
125 snrt_int_sw_poll();
126}
127inline void wake_dm(void) {
128 uint32_t basehart = snrt_cluster_core_base_hartid();
129 snrt_int_sw_set(basehart + snrt_cluster_dm_core_idx());
130}
131#else
132inline void wfi_dm(uint32_t cluster_core_idx) {
133 __atomic_add_fetch(&dm_p->dm_wfi, 1, __ATOMIC_RELAXED);
134 snrt_wfi();
135 snrt_int_cluster_clr(1 << cluster_core_idx);
136 __atomic_add_fetch(&dm_p->dm_wfi, -1, __ATOMIC_RELAXED);
137}
138inline void wake_dm(void) {
139 // wait for DM to sleep before sending wakeup
140 while (!__atomic_load_n(&dm_p->dm_wfi, __ATOMIC_RELAXED))
141 ;
142 snrt_int_cluster_set(1 << snrt_cluster_compute_core_num());
143}
144#endif // #ifdef DM_USE_GLOBAL_CLINT
145
152inline void dm_init(void) {
153 // create a data mover instance
154 if (snrt_is_dm_core()) {
155#ifdef DM_USE_GLOBAL_CLINT
156 snrt_interrupt_enable(IRQ_M_SOFT);
157#else
158 snrt_interrupt_enable(IRQ_M_CLUSTER);
159#endif
160 dm_p = (dm_t *)snrt_l1_alloc(sizeof(dm_t));
161 snrt_memset((void *)dm_p, 0, sizeof(dm_t));
162 dm_p_global = dm_p;
163 } else {
164 while (!dm_p_global)
165 ;
166 dm_p = dm_p_global;
167 }
168}
169
174inline void dm_main(void) {
175 volatile dm_task_t *t;
176 uint32_t do_exit = 0;
177 uint32_t cluster_core_idx = snrt_cluster_core_idx();
178
179 DM_PRINTF(10, "enter main\n");
180
181 while (!do_exit) {
183 if (dm_p->queue_fill) {
184 // wait until DMA is ready
185 while (__builtin_sdma_stat(DM_STATUS_WOULD_BLOCK))
186 ;
187
188 t = &dm_p->queue[dm_p->queue_back];
189
190 if (t->twod) {
191 DM_PRINTF(10, "start twod\n");
192 __builtin_sdma_start_twod(t->src, t->dst, t->size, t->sstrd,
193 t->dstrd, t->nreps, t->cfg);
194 } else {
195 DM_PRINTF(10, "start oned\n");
196 __builtin_sdma_start_oned(t->src, t->dst, t->size, t->cfg);
197 }
198
199 // bump
200 dm_p->queue_back = (dm_p->queue_back + 1) % DM_TASK_QUEUE_SIZE;
201 __atomic_add_fetch(&dm_p->queue_fill, -1, __ATOMIC_RELAXED);
202 }
203
205 if (dm_p->stat_q) {
206 switch (dm_p->stat_q) {
207 case STAT_WAIT_IDLE:
208 // check status and set pvalid if DMA is idle and clear
209 // request
210 if (__builtin_sdma_stat(DM_STATUS_BUSY) == 0) {
211 DM_PRINTF(50, "idle\n");
212 dm_p->stat_pvalid = 1;
213 dm_p->stat_q = 0;
214 }
215 break;
216 case STAT_EXIT:
217 do_exit = 1;
218 break;
219 case STAT_READY:
220 DM_PRINTF(50, "ready\n");
221 dm_p->stat_pvalid = 1;
222 dm_p->stat_q = 0;
223 break;
224 }
225 }
226
227 // sleep if queue is empty and no stats pending
228 if (!dm_p->queue_fill && !dm_p->stat_q) {
229 wfi_dm(cluster_core_idx);
230 }
231 }
232 DM_PRINTF(10, "dm: exit\n");
233#ifdef DM_USE_GLOBAL_CLINT
234 snrt_interrupt_disable(IRQ_M_SOFT);
235#else
236 snrt_interrupt_disable(IRQ_M_CLUSTER);
237#endif
238 return;
239}
240
245inline void dm_exit(void) {
246 dm_p->stat_q = STAT_EXIT;
247 // signal data mover
248 wake_dm();
249}
250
261inline void dm_memcpy_async(void *dest, const void *src, size_t n) {
262 uint32_t s;
263 volatile dm_task_t *t;
264
265 DM_PRINTF(10, "dm_memcpy_async %#x -> %#x size %d\n", src, dest,
266 (uint32_t)n);
267
268 // poll queue size
269 do {
270 s = __atomic_load_n(&dm_p->queue_fill, __ATOMIC_RELAXED);
271 } while (s >= DM_TASK_QUEUE_SIZE);
272 _dm_mtx_lock();
273
274 // insert
275 t = &dm_p->queue[dm_p->queue_front];
276 t->src = (uint64_t)src;
277 t->dst = (uint64_t)dest;
278 t->size = (uint32_t)n;
279 t->twod = 0;
280 t->cfg = 0;
281
282 // bump
283 __atomic_add_fetch(&dm_p->queue_fill, 1, __ATOMIC_RELAXED);
284 dm_p->queue_front = (dm_p->queue_front + 1) % DM_TASK_QUEUE_SIZE;
285
286 _dm_mtx_release();
287}
288
302inline void dm_memcpy2d_async(uint64_t src, uint64_t dst, uint32_t size,
303 uint32_t sstrd, uint32_t dstrd, uint32_t nreps,
304 uint32_t cfg) {
305 uint32_t s;
306 volatile dm_task_t *t;
307
308 DM_PRINTF(10, "dm_memcpy2d_async %#x -> %#x size %d\n", src, dst,
309 (uint32_t)size);
310
311 // poll queue size
312 do {
313 s = __atomic_load_n(&dm_p->queue_fill, __ATOMIC_RELAXED);
314 } while (s >= DM_TASK_QUEUE_SIZE);
315 _dm_mtx_lock();
316
317 // insert
318 t = &dm_p->queue[dm_p->queue_front];
319 t->src = src;
320 t->dst = dst;
321 t->size = size;
322 t->sstrd = sstrd;
323 t->dstrd = dstrd;
324 t->nreps = nreps;
325 t->twod = 1;
326 t->cfg = cfg;
327
328 // bump
329 __atomic_add_fetch(&dm_p->queue_fill, 1, __ATOMIC_RELAXED);
330 dm_p->queue_front = (dm_p->queue_front + 1) % DM_TASK_QUEUE_SIZE;
331
332 _dm_mtx_release();
333}
334
339inline void dm_start(void) { wake_dm(); }
340
345inline void dm_wait(void) {
346 uint32_t s;
347
348 // signal data mover
349 wake_dm();
350
351 // first, wait for the dm queue to be empty and no request be pending
352 do {
353 s = __atomic_load_n(&dm_p->queue_fill, __ATOMIC_RELAXED);
354 } while (s != 0);
355 while (dm_p->stat_q)
356 ;
357
358 // then, issue the STAT_WAIT_IDLE request so the DM core polls for the DMA
359 // to be idle
360 _dm_mtx_lock();
361 dm_p->stat_pvalid = 0;
362 // this is the request
363 dm_p->stat_q = STAT_WAIT_IDLE;
364 // signal data mover
365 wake_dm();
366 // whenever stat_pvalid is non-zero, the DMA has completed all transfers
367 while (!dm_p->stat_pvalid)
368 ;
369 _dm_mtx_release();
370}
371
376inline void dm_wait_ready(void) {
377 _dm_mtx_lock();
378 dm_p->stat_pvalid = 0;
379 dm_p->stat_q = STAT_READY;
380 wake_dm();
381 while (!dm_p->stat_pvalid)
382 ;
383 _dm_mtx_release();
384}
385
386#endif /* DM_H */
Definition dm.h:91
Definition dm.h:68