Snitch Runtime
Loading...
Searching...
No Matches
eu.h
1// Copyright 2021 ETH Zurich and University of Bologna.
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
5#ifndef EU_H
6#define EU_H
7
8#include "eu_decls.h"
9
10//================================================================================
11// Settings
12//================================================================================
21// #define EU_USE_GLOBAL_CLINT
22
23//================================================================================
24// Debug
25//================================================================================
26
27#ifdef EU_DEBUG_LEVEL
28#include "printf.h"
29#define _EU_PRINTF(...) \
30 if (1) { \
31 printf("[eu] "__VA_ARGS__); \
32 }
33#define EU_PRINTF(d, ...) \
34 if (EU_DEBUG_LEVEL >= d) { \
35 _EU_PRINTF(__VA_ARGS__); \
36 }
37#else
38#define EU_PRINTF(d, ...)
39#endif
40
41//================================================================================
42// Data
43//================================================================================
44
50extern __thread volatile eu_t *eu_p;
51
56extern volatile eu_t *volatile eu_p_global;
57
58//================================================================================
59// Functions
60//================================================================================
61
62inline void wait_worker_wfi(void) {
63 uint32_t scratch = eu_p->workers_in_loop;
64 while (__atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED) != scratch)
65 ;
66}
67
72#ifdef EU_USE_GLOBAL_CLINT
73
74inline void wake_workers(void) {
75#ifdef OMPSTATIC_NUMTHREADS
76#define WAKE_MASK (((1 << OMPSTATIC_NUMTHREADS) - 1) & ~0x1)
77 // Fast wake-up for static number of worker threads
78 uint32_t basehart = snrt_cluster_core_base_hartid();
79 if ((basehart % 32) + OMPSTATIC_NUMTHREADS > 32) {
80 // wake-up is split over two CLINT registers
81 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
82 snrt_int_clint_set(basehart / 32 + 1,
83 WAKE_MASK >> (32 - basehart % 32));
84 } else {
85 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
86 }
87 const uint32_t mask = OMPSTATIC_NUMTHREADS - 1;
88#else
89
90 // wake all worker cores except the main thread
91 uint32_t numcores = snrt_cluster_compute_core_num(),
92 basehart = snrt_cluster_core_base_hartid();
93 uint32_t mask = 0, hart = 1;
94 for (; hart < numcores; ++hart) {
95 mask |= 1 << (basehart + hart);
96 if ((basehart + hart + 1) % 32 == 0) {
97 snrt_int_clint_set((basehart + hart) / 32, mask);
98 mask = 0;
99 }
100 }
101 if (mask) snrt_int_clint_set((basehart + hart) / 32, mask);
102#endif
103}
104
105inline void worker_wfi(uint32_t cluster_core_idx) {
106 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
107 snrt_int_sw_poll();
108 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
109}
110
115#else // #ifdef EU_USE_GLOBAL_CLINT
116
117inline void wake_workers(void) {
118 // Guard to wake only if all workers are wfi
119 wait_worker_wfi();
120 // Wake the cluster cores. We do this with cluster relative hart IDs and do
121 // not wake hart 0 since this is the main thread
122 uint32_t numcores = snrt_cluster_compute_core_num();
123 snrt_int_cluster_set(~0x1 & ((1 << numcores) - 1));
124}
125inline void worker_wfi(uint32_t cluster_core_idx) {
126 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
127 snrt_wfi();
128 snrt_int_cluster_clr(1 << cluster_core_idx);
129 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
130}
131
132#endif // #ifdef EU_USE_GLOBAL_CLINT
133
138inline void eu_print_status() {
139 EU_PRINTF(0, "workers_in_loop=%d\n", eu_p->workers_in_loop);
140}
141
145inline void eu_mutex_lock() { snrt_mutex_acquire(&eu_p->workers_mutex); }
146
150inline void eu_mutex_release() { snrt_mutex_release(&eu_p->workers_mutex); }
151
155inline uint32_t eu_get_workers_in_loop() {
156 return __atomic_load_n(&eu_p->workers_in_loop, __ATOMIC_RELAXED);
157}
158inline uint32_t eu_get_workers_in_wfi() {
159 return __atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED);
160}
161
165inline void eu_init(void) {
166 if (snrt_cluster_core_idx() == 0) {
167 // Allocate the eu struct in L1 for fast access
168 eu_p = snrt_l1_alloc(sizeof(eu_t));
169 snrt_memset((void *)eu_p, 0, sizeof(eu_t));
170 // store copy of eu_p on shared memory
171 eu_p_global = eu_p;
172 } else {
173 while (!eu_p_global)
174 ;
175 eu_p = eu_p_global;
176 }
177}
178
183inline void eu_exit(uint32_t core_idx) {
184 // make sure queue is empty
185 if (!eu_p->e.nthreads) eu_run_empty(core_idx);
186 // set exit flag and wake cores
187 wait_worker_wfi();
188 eu_p->exit_flag = 1;
189 wake_workers();
190}
191
197inline void eu_event_loop(uint32_t cluster_core_idx) {
198 uint32_t scratch;
199 uint32_t nthds;
200
201 // count number of workers in loop
202 __atomic_add_fetch(&eu_p->workers_in_loop, 1, __ATOMIC_RELAXED);
203
204 // enable software interrupts
205#ifdef EU_USE_GLOBAL_CLINT
206 snrt_interrupt_enable(IRQ_M_SOFT);
207#else
208 snrt_interrupt_enable(IRQ_M_CLUSTER);
209#endif
210
211 EU_PRINTF(0, "#%d entered event loop\n", cluster_core_idx);
212
213 while (1) {
214 // check for exit
215 if (eu_p->exit_flag) {
216#ifdef EU_USE_GLOBAL_CLINT
217 snrt_interrupt_disable(IRQ_M_SOFT);
218#else
219 // TODO colluca: should this be "disable"?
220 snrt_interrupt_enable(IRQ_M_CLUSTER);
221#endif
222 return;
223 }
224
225 if (cluster_core_idx < eu_p->e.nthreads) {
226 // make a local copy of nthreads to sync after work since the master
227 // hart will reset eu_p->e.nthreads as soon as all workers finished
228 // which might cause a race condition
229 nthds = eu_p->e.nthreads;
230 EU_PRINTF(0, "run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
231 ((uint32_t *)eu_p->e.data)[0]);
232 // call
233 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
234 }
235
236 // enter wait for interrupt
237 __atomic_add_fetch(&eu_p->e.fini_count, 1, __ATOMIC_RELAXED);
238 worker_wfi(cluster_core_idx);
239 }
240}
241
251inline int eu_dispatch_push(void (*fn)(void *, uint32_t), uint32_t argc,
252 void *data, uint32_t nthreads) {
253 // wait for workers to be in wfi before manipulating the event struct
254 wait_worker_wfi();
255
256 // fill queue
257 eu_p->e.fn = fn;
258 eu_p->e.data = data;
259 eu_p->e.argc = argc;
260 eu_p->e.nthreads = nthreads;
261
262 EU_PRINTF(10, "eu_dispatch_push success, workers %d in loop %d\n", nthreads,
263 eu_p->workers_in_loop);
264
265 return 0;
266}
267
272inline void eu_run_empty(uint32_t core_idx) {
273 unsigned nfini, scratch;
274 scratch = eu_p->e.nthreads;
275 if (!scratch) return;
276 EU_PRINTF(10, "eu_run_empty enter: q size %d\n", eu_p->e.nthreads);
277
278 eu_p->e.fini_count = 0;
279 if (scratch > 1) wake_workers();
280
281 // Am i also part of the team?
282 if (core_idx < eu_p->e.nthreads) {
283 // call
284 EU_PRINTF(0, "run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
285 ((uint32_t *)eu_p->e.data)[0]);
286 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
287 }
288
289 // wait for queue to be empty
290 if (scratch > 1) {
291 scratch = eu_get_workers_in_loop();
292 while (__atomic_load_n(&eu_p->e.fini_count, __ATOMIC_RELAXED) !=
293 scratch)
294 ;
295 }
296
297 // stop workers from re-executing the task
298 eu_p->e.nthreads = 0;
299
300 EU_PRINTF(10, "eu_run_empty exit\n");
301}
302
303#endif /* EU_H */
Definition eu_decls.h:9