29#define _EU_PRINTF(...) \
31 printf("[eu] "__VA_ARGS__); \
33#define EU_PRINTF(d, ...) \
34 if (EU_DEBUG_LEVEL >= d) { \
35 _EU_PRINTF(__VA_ARGS__); \
38#define EU_PRINTF(d, ...)
50extern __thread
volatile eu_t *eu_p;
56extern volatile eu_t *
volatile eu_p_global;
62inline void wait_worker_wfi(
void) {
63 uint32_t scratch = eu_p->workers_in_loop;
64 while (__atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED) != scratch)
72#ifdef EU_USE_GLOBAL_CLINT
74inline void wake_workers(
void) {
75#ifdef OMPSTATIC_NUMTHREADS
76#define WAKE_MASK (((1 << OMPSTATIC_NUMTHREADS) - 1) & ~0x1)
78 uint32_t basehart = snrt_cluster_core_base_hartid();
79 if ((basehart % 32) + OMPSTATIC_NUMTHREADS > 32) {
81 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
82 snrt_int_clint_set(basehart / 32 + 1,
83 WAKE_MASK >> (32 - basehart % 32));
85 snrt_int_clint_set(basehart / 32, WAKE_MASK << (basehart % 32));
87 const uint32_t mask = OMPSTATIC_NUMTHREADS - 1;
91 uint32_t numcores = snrt_cluster_compute_core_num(),
92 basehart = snrt_cluster_core_base_hartid();
93 uint32_t mask = 0, hart = 1;
94 for (; hart < numcores; ++hart) {
95 mask |= 1 << (basehart + hart);
96 if ((basehart + hart + 1) % 32 == 0) {
97 snrt_int_clint_set((basehart + hart) / 32, mask);
101 if (mask) snrt_int_clint_set((basehart + hart) / 32, mask);
105inline void worker_wfi(uint32_t cluster_core_idx) {
106 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
108 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
117inline void wake_workers(
void) {
122 uint32_t numcores = snrt_cluster_compute_core_num();
123 snrt_int_cluster_set(~0x1 & ((1 << numcores) - 1));
125inline void worker_wfi(uint32_t cluster_core_idx) {
126 __atomic_add_fetch(&eu_p->workers_wfi, 1, __ATOMIC_RELAXED);
128 snrt_int_cluster_clr(1 << cluster_core_idx);
129 __atomic_add_fetch(&eu_p->workers_wfi, -1, __ATOMIC_RELAXED);
138inline void eu_print_status() {
139 EU_PRINTF(0,
"workers_in_loop=%d\n", eu_p->workers_in_loop);
145inline void eu_mutex_lock() { snrt_mutex_acquire(&eu_p->workers_mutex); }
150inline void eu_mutex_release() { snrt_mutex_release(&eu_p->workers_mutex); }
155inline uint32_t eu_get_workers_in_loop() {
156 return __atomic_load_n(&eu_p->workers_in_loop, __ATOMIC_RELAXED);
158inline uint32_t eu_get_workers_in_wfi() {
159 return __atomic_load_n(&eu_p->workers_wfi, __ATOMIC_RELAXED);
165inline void eu_init(
void) {
166 if (snrt_cluster_core_idx() == 0) {
168 eu_p = snrt_l1_alloc(
sizeof(
eu_t));
169 snrt_memset((
void *)eu_p, 0,
sizeof(
eu_t));
183inline void eu_exit(uint32_t core_idx) {
185 if (!eu_p->e.nthreads) eu_run_empty(core_idx);
197inline void eu_event_loop(uint32_t cluster_core_idx) {
202 __atomic_add_fetch(&eu_p->workers_in_loop, 1, __ATOMIC_RELAXED);
205#ifdef EU_USE_GLOBAL_CLINT
206 snrt_interrupt_enable(IRQ_M_SOFT);
208 snrt_interrupt_enable(IRQ_M_CLUSTER);
211 EU_PRINTF(0,
"#%d entered event loop\n", cluster_core_idx);
215 if (eu_p->exit_flag) {
216#ifdef EU_USE_GLOBAL_CLINT
217 snrt_interrupt_disable(IRQ_M_SOFT);
220 snrt_interrupt_enable(IRQ_M_CLUSTER);
225 if (cluster_core_idx < eu_p->e.nthreads) {
229 nthds = eu_p->e.nthreads;
230 EU_PRINTF(0,
"run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
231 ((uint32_t *)eu_p->e.data)[0]);
233 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
237 __atomic_add_fetch(&eu_p->e.fini_count, 1, __ATOMIC_RELAXED);
238 worker_wfi(cluster_core_idx);
251inline int eu_dispatch_push(
void (*fn)(
void *, uint32_t), uint32_t argc,
252 void *data, uint32_t nthreads) {
260 eu_p->e.nthreads = nthreads;
262 EU_PRINTF(10,
"eu_dispatch_push success, workers %d in loop %d\n", nthreads,
263 eu_p->workers_in_loop);
272inline void eu_run_empty(uint32_t core_idx) {
273 unsigned nfini, scratch;
274 scratch = eu_p->e.nthreads;
275 if (!scratch)
return;
276 EU_PRINTF(10,
"eu_run_empty enter: q size %d\n", eu_p->e.nthreads);
278 eu_p->e.fini_count = 0;
279 if (scratch > 1) wake_workers();
282 if (core_idx < eu_p->e.nthreads) {
284 EU_PRINTF(0,
"run fn @ %#x (arg 0 = %#x)\n", eu_p->e.fn,
285 ((uint32_t *)eu_p->e.data)[0]);
286 eu_p->e.fn(eu_p->e.data, eu_p->e.argc);
291 scratch = eu_get_workers_in_loop();
292 while (__atomic_load_n(&eu_p->e.fini_count, __ATOMIC_RELAXED) !=
298 eu_p->e.nthreads = 0;
300 EU_PRINTF(10,
"eu_run_empty exit\n");