Snitch Runtime
Loading...
Searching...
No Matches
eu.h
1// Copyright 2021 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
5#ifndef EU_H
6#define EU_H
7
8#include <string.h>
9
10#include "eu_decls.h"
11
12//================================================================================
13// Settings
14//================================================================================
23// #define EU_USE_GLOBAL_CLINT
24
25//================================================================================
26// Debug
27//================================================================================
28
29#ifdef EU_DEBUG_LEVEL
30#include "printf.h"
31#define _EU_PRINTF(...) \
32 if (1) { \
33 printf("[eu] "__VA_ARGS__); \
34 }
35#define EU_PRINTF(d, ...) \
36 if (EU_DEBUG_LEVEL >= d) { \
37 _EU_PRINTF(__VA_ARGS__); \
38 }
39#else
40#define EU_PRINTF(d, ...)
41#endif
42
43//================================================================================
44// Data
45//================================================================================
46
52extern __thread volatile eu_t *eu_p;
53
58extern volatile eu_t *volatile eu_p_global;
59
60//================================================================================
61// Functions
62//================================================================================
63
64static inline void wait_worker_wfi(void) {
65 uint32_t scratch = eu_p->workers_in_loop;
66 while (__atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED) != scratch)
67 ;
68}
69
74#ifdef EU_USE_GLOBAL_CLINT
75
76static inline void wake_workers(void) {
77#ifdef OMPSTATIC_NUMTHREADS
78#define WAKE_MASK (((1 << OMPSTATIC_NUMTHREADS) - 1) & ~0x1)
79 // Fast wake-up for static number of worker threads
80 uint32_t basehart = snrt_cluster_core_base_hartid();
81 if ((basehart % 32) + OMPSTATIC_NUMTHREADS > 32) {
82 // wake-up is split over two CLINT registers
83 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
84 snrt_int_clint_set(basehart / 32 + 1,
85 WAKE_MASK >> (32 - basehart % 32));
86 } else {
87 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
88 }
89 const uint32_t mask = OMPSTATIC_NUMTHREADS - 1;
90#else
91
92 // wake all worker cores except the main thread
93 uint32_t numcores = snrt_cluster_compute_core_num(),
94 basehart = snrt_cluster_core_base_hartid();
95 uint32_t mask = 0, hart = 1;
96 for (; hart < numcores; ++hart) {
97 mask |= 1 << (basehart + hart);
98 if ((basehart + hart + 1) % 32 == 0) {
99 snrt_int_clint_set((basehart + hart) / 32, mask);
100 mask = 0;
101 }
102 }
103 if (mask) snrt_int_clint_set((basehart + hart) / 32, mask);
104#endif
105}
106
107static inline void worker_wfi(uint32_t cluster_core_idx) {
108 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
109 snrt_int_sw_poll();
110 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
111}
112
117#else // #ifdef EU_USE_GLOBAL_CLINT
118
119static inline void wake_workers(void) {
120 // Guard to wake only if all workers are wfi
121 wait_worker_wfi();
122 // Wake the cluster cores. We do this with cluster relative hart IDs and do
123 // not wake hart 0 since this is the main thread
124 uint32_t numcores = snrt_cluster_compute_core_num();
125 snrt_int_cluster_set(~0x1 & ((1 << numcores) - 1));
126}
127static inline void worker_wfi(uint32_t cluster_core_idx) {
128 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
129 snrt_wfi();
130 snrt_int_cluster_clr(1 << cluster_core_idx);
131 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
132}
133
134#endif // #ifdef EU_USE_GLOBAL_CLINT
135
140inline void eu_print_status() {
141 EU_PRINTF(0, "workers_in_loop=%d\n", eu_p->workers_in_loop);
142}
143
147inline void eu_mutex_lock() { snrt_mutex_acquire(&eu_p->workers_mutex); }
148
152inline void eu_mutex_release() { snrt_mutex_release(&eu_p->workers_mutex); }
153
157inline uint32_t eu_get_workers_in_loop() {
158 return __atomic_load_n(&eu_p->workers_in_loop, __ATOMIC_RELAXED);
159}
160inline uint32_t eu_get_workers_in_wfi() {
161 return __atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED);
162}
163
167inline void eu_init(void) {
168 if (snrt_cluster_core_idx() == 0) {
169 // Allocate the eu struct in L1 for fast access
170 eu_p = (eu_t *)snrt_l1_alloc(sizeof(eu_t));
171 memset((void *)eu_p, 0, sizeof(eu_t));
172 // store copy of eu_p on shared memory
173 eu_p_global = eu_p;
174 } else {
175 while (!eu_p_global)
176 ;
177 eu_p = eu_p_global;
178 }
179}
180
185inline void eu_exit(uint32_t core_idx) {
186 // make sure queue is empty
187 if (!eu_p->e.nthreads) eu_run_empty(core_idx);
188 // set exit flag and wake cores
189 wait_worker_wfi();
190 eu_p->exit_flag = 1;
191 wake_workers();
192}
193
199inline void eu_event_loop(uint32_t cluster_core_idx) {
200 uint32_t scratch;
201 uint32_t nthds;
202
203 // count number of workers in loop
204 __atomic_add_fetch(&eu_p->workers_in_loop, 1, __ATOMIC_RELAXED);
205
206 // enable software interrupts
207#ifdef EU_USE_GLOBAL_CLINT
208 snrt_interrupt_enable(IRQ_M_SOFT);
209#else
210 snrt_interrupt_enable(IRQ_M_CLUSTER);
211#endif
212
213 EU_PRINTF(0, "#%d entered event loop\n", cluster_core_idx);
214
215 while (1) {
216 // check for exit
217 if (eu_p->exit_flag) {
218#ifdef EU_USE_GLOBAL_CLINT
219 snrt_interrupt_disable(IRQ_M_SOFT);
220#else
221 // TODO colluca: should this be "disable"?
222 snrt_interrupt_enable(IRQ_M_CLUSTER);
223#endif
224 return;
225 }
226
227 if (cluster_core_idx < eu_p->e.nthreads) {
228 // make a local copy of nthreads to sync after work since the master
229 // hart will reset eu_p->e.nthreads as soon as all workers finished
230 // which might cause a race condition
231 nthds = eu_p->e.nthreads;
232 EU_PRINTF(0, "run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
233 ((uint32_t *)eu_p->e.data)[0]);
234 // call
235 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
236 }
237
238 // enter wait for interrupt
239 __atomic_add_fetch(&eu_p->e.fini_count, 1, __ATOMIC_RELAXED);
240 worker_wfi(cluster_core_idx);
241 }
242}
243
253inline int eu_dispatch_push(void (*fn)(void *, uint32_t), uint32_t argc,
254 void *data, uint32_t nthreads) {
255 // wait for workers to be in wfi before manipulating the event struct
256 wait_worker_wfi();
257
258 // fill queue
259 eu_p->e.fn = fn;
260 eu_p->e.data = data;
261 eu_p->e.argc = argc;
262 eu_p->e.nthreads = nthreads;
263
264 EU_PRINTF(10, "eu_dispatch_push success, workers %d in loop %d\n", nthreads,
265 eu_p->workers_in_loop);
266
267 return 0;
268}
269
274inline void eu_run_empty(uint32_t core_idx) {
275 unsigned nfini, scratch;
276 scratch = eu_p->e.nthreads;
277 if (!scratch) return;
278 EU_PRINTF(10, "eu_run_empty enter: q size %d\n", eu_p->e.nthreads);
279
280 eu_p->e.fini_count = 0;
281 if (scratch > 1) wake_workers();
282
283 // Am i also part of the team?
284 if (core_idx < eu_p->e.nthreads) {
285 // call
286 EU_PRINTF(0, "run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
287 ((uint32_t *)eu_p->e.data)[0]);
288 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
289 }
290
291 // wait for queue to be empty
292 if (scratch > 1) {
293 scratch = eu_get_workers_in_loop();
294 while (__atomic_load_n(&eu_p->e.fini_count, __ATOMIC_RELAXED) !=
295 scratch)
296 ;
297 }
298
299 // stop workers from re-executing the task
300 eu_p->e.nthreads = 0;
301
302 EU_PRINTF(10, "eu_run_empty exit\n");
303}
304
305#endif /* EU_H */
Definition eu_decls.h:9