31#define _EU_PRINTF(...) \
33 printf("[eu] "__VA_ARGS__); \
35#define EU_PRINTF(d, ...) \
36 if (EU_DEBUG_LEVEL >= d) { \
37 _EU_PRINTF(__VA_ARGS__); \
40#define EU_PRINTF(d, ...)
52extern __thread
volatile eu_t *eu_p;
58extern volatile eu_t *
volatile eu_p_global;
64static inline void wait_worker_wfi(
void) {
65 uint32_t scratch = eu_p->workers_in_loop;
66 while (__atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED) != scratch)
74#ifdef EU_USE_GLOBAL_CLINT
76static inline void wake_workers(
void) {
77#ifdef OMPSTATIC_NUMTHREADS
78#define WAKE_MASK (((1 << OMPSTATIC_NUMTHREADS) - 1) & ~0x1)
80 uint32_t basehart = snrt_cluster_core_base_hartid();
81 if ((basehart % 32) + OMPSTATIC_NUMTHREADS > 32) {
83 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
84 snrt_int_clint_set(basehart / 32 + 1,
85 WAKE_MASK >> (32 - basehart % 32));
87 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
89 const uint32_t mask = OMPSTATIC_NUMTHREADS - 1;
93 uint32_t numcores = snrt_cluster_compute_core_num(),
94 basehart = snrt_cluster_core_base_hartid();
95 uint32_t mask = 0, hart = 1;
96 for (; hart < numcores; ++hart) {
97 mask |= 1 << (basehart + hart);
98 if ((basehart + hart + 1) % 32 == 0) {
99 snrt_int_clint_set((basehart + hart) / 32, mask);
103 if (mask) snrt_int_clint_set((basehart + hart) / 32, mask);
107static inline void worker_wfi(uint32_t cluster_core_idx) {
108 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
110 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
119static inline void wake_workers(
void) {
124 uint32_t numcores = snrt_cluster_compute_core_num();
125 snrt_int_cluster_set(~0x1 & ((1 << numcores) - 1));
127static inline void worker_wfi(uint32_t cluster_core_idx) {
128 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
130 snrt_int_cluster_clr(1 << cluster_core_idx);
131 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
140inline void eu_print_status() {
141 EU_PRINTF(0,
"workers_in_loop=%d\n", eu_p->workers_in_loop);
147inline void eu_mutex_lock() { snrt_mutex_acquire(&eu_p->workers_mutex); }
152inline void eu_mutex_release() { snrt_mutex_release(&eu_p->workers_mutex); }
157inline uint32_t eu_get_workers_in_loop() {
158 return __atomic_load_n(&eu_p->workers_in_loop, __ATOMIC_RELAXED);
160inline uint32_t eu_get_workers_in_wfi() {
161 return __atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED);
167inline void eu_init(
void) {
168 if (snrt_cluster_core_idx() == 0) {
170 eu_p = (
eu_t *)snrt_l1_alloc(
sizeof(
eu_t));
171 memset((
void *)eu_p, 0,
sizeof(
eu_t));
185inline void eu_exit(uint32_t core_idx) {
187 if (!eu_p->e.nthreads) eu_run_empty(core_idx);
199inline void eu_event_loop(uint32_t cluster_core_idx) {
204 __atomic_add_fetch(&eu_p->workers_in_loop, 1, __ATOMIC_RELAXED);
207#ifdef EU_USE_GLOBAL_CLINT
208 snrt_interrupt_enable(IRQ_M_SOFT);
210 snrt_interrupt_enable(IRQ_M_CLUSTER);
213 EU_PRINTF(0,
"#%d entered event loop\n", cluster_core_idx);
217 if (eu_p->exit_flag) {
218#ifdef EU_USE_GLOBAL_CLINT
219 snrt_interrupt_disable(IRQ_M_SOFT);
222 snrt_interrupt_enable(IRQ_M_CLUSTER);
227 if (cluster_core_idx < eu_p->e.nthreads) {
231 nthds = eu_p->e.nthreads;
232 EU_PRINTF(0,
"run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
233 ((uint32_t *)eu_p->e.data)[0]);
235 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
239 __atomic_add_fetch(&eu_p->e.fini_count, 1, __ATOMIC_RELAXED);
240 worker_wfi(cluster_core_idx);
253inline int eu_dispatch_push(
void (*fn)(
void *, uint32_t), uint32_t argc,
254 void *data, uint32_t nthreads) {
262 eu_p->e.nthreads = nthreads;
264 EU_PRINTF(10,
"eu_dispatch_push success, workers %d in loop %d\n", nthreads,
265 eu_p->workers_in_loop);
274inline void eu_run_empty(uint32_t core_idx) {
275 unsigned nfini, scratch;
276 scratch = eu_p->e.nthreads;
277 if (!scratch)
return;
278 EU_PRINTF(10,
"eu_run_empty enter: q size %d\n", eu_p->e.nthreads);
280 eu_p->e.fini_count = 0;
281 if (scratch > 1) wake_workers();
284 if (core_idx < eu_p->e.nthreads) {
286 EU_PRINTF(0,
"run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
287 ((uint32_t *)eu_p->e.data)[0]);
288 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
293 scratch = eu_get_workers_in_loop();
294 while (__atomic_load_n(&eu_p->e.fini_count, __ATOMIC_RELAXED) !=
300 eu_p->e.nthreads = 0;
302 EU_PRINTF(10,
"eu_run_empty exit\n");