15#include "../../deps/riscv-opcodes/encoding.h"
19#define SNRT_BROADCAST_MASK ((SNRT_CLUSTER_NUM - 1) * SNRT_CLUSTER_OFFSET)
28inline volatile uint32_t *
snrt_mutex() {
return &_snrt_mutex; }
42 " amoswap.w.aq t0,t0,(%0) # t0 = oldlock & lock = 1\n"
43 " bnez t0,1b # Retry if previously set)\n"
61 " amoswap.w.aq t0,t0,(%0) # t0 = oldlock & lock = 1\n"
62 " bnez t0,2b # Retry if previously set)\n"
72 asm volatile(
"amoswap.w.rl x0,x0,(%0) # Release lock by storing 0\n"
80inline void snrt_wake_all(uint32_t core_mask) {
81#ifdef SNRT_SUPPORTS_MULTICAST
87 uintptr_t addr = (uintptr_t)snrt_cluster_clint_set_ptr() -
88 SNRT_CLUSTER_OFFSET * snrt_cluster_idx();
89 if (snrt_cluster_idx() == 0) addr += SNRT_CLUSTER_OFFSET;
91 *((uint32_t *)addr) = core_mask;
94 for (
int i = 0; i < snrt_cluster_num(); i++) {
95 if (snrt_cluster_idx() != i) {
96 void *ptr = snrt_remote_l1_ptr(snrt_cluster_clint_set_ptr(),
97 snrt_cluster_idx(), i);
98 *((uint32_t *)ptr) = core_mask;
111 asm volatile(
"csrr x0, 0x7C2" :::
"memory");
120static inline void snrt_inter_cluster_barrier() {
123 __atomic_add_fetch(&(_snrt_barrier.cnt), 1, __ATOMIC_RELAXED);
128 if (cnt == snrt_cluster_num()) {
129 _snrt_barrier.cnt = 0;
131 snrt_wake_all(1 << snrt_cluster_core_idx());
152 if (snrt_is_dm_core()) {
153 snrt_inter_cluster_barrier();
168 uint32_t prev_it = barr->iteration;
169 uint32_t cnt = __atomic_add_fetch(&barr->cnt, 1, __ATOMIC_RELAXED);
174 __atomic_add_fetch(&barr->iteration, 1, __ATOMIC_RELAXED);
177 while (prev_it == barr->iteration)
198 uint32_t *cluster_result = &(cls()->reduction);
199 uint32_t tmp = __atomic_fetch_add(cluster_result, value, __ATOMIC_RELAXED);
206 if (snrt_is_dm_core()) {
207 __atomic_add_fetch(&_reduction_result, *cluster_result,
209 snrt_inter_cluster_barrier();
210 *cluster_result = _reduction_result;
213 return *cluster_result;
235 if (snrt_cluster_num() > 1) {
237 int num_levels = ceil(log2(snrt_cluster_num()));
238 for (
unsigned int level = 0; level < num_levels; level++) {
243 uint32_t is_active = (snrt_cluster_idx() % (1 << level)) == 0;
244 uint32_t is_sender = (snrt_cluster_idx() % (1 << (level + 1))) != 0;
248 if (is_active && is_sender) {
249 if (!snrt_is_compute_core()) {
250 uint64_t dst = (uint64_t)dst_buffer -
251 (1 << level) * SNRT_CLUSTER_OFFSET;
252 snrt_dma_start_1d(dst, (uint64_t)src_buffer,
253 len *
sizeof(
double));
262 if (is_active && !is_sender) {
264 if (snrt_is_compute_core()) {
265 uint32_t items_per_core =
266 len / snrt_cluster_compute_core_num();
267 uint32_t core_offset =
268 snrt_cluster_core_idx() * items_per_core;
269 for (uint32_t i = 0; i < items_per_core; i++) {
270 uint32_t abs_i = core_offset + i;
271 src_buffer[abs_i] += dst_buffer[abs_i];
293 asm volatile(
"mv %0, %0" :
"+r"(val)::);
Definition sync_decls.h:9
void snrt_partial_barrier(snrt_barrier_t *barr, uint32_t n)
Generic software barrier.
Definition sync.h:166
void snrt_enable_multicast(uint32_t mask)
Enable LSU multicast.
Definition sync.h:307
void snrt_mutex_ttas_acquire(volatile uint32_t *pmtx)
Acquire a mutex, blocking.
Definition sync.h:54
void snrt_mutex_acquire(volatile uint32_t *pmtx)
Acquire a mutex, blocking.
Definition sync.h:38
volatile uint32_t * snrt_mutex()
Get a pointer to a mutex variable.
Definition sync.h:28
void snrt_wait_writeback(uint32_t val)
Ensure value is written back to the register file.
Definition sync.h:292
uint32_t snrt_global_all_to_all_reduction(uint32_t value)
Perform a global sum reduction, blocking.
Definition sync.h:196
void snrt_global_barrier()
Synchronize all Snitch cores.
Definition sync.h:148
void snrt_cluster_hw_barrier()
Synchronize cores in a cluster with a hardware barrier, blocking.
Definition sync.h:110
void snrt_mutex_release(volatile uint32_t *pmtx)
Release a previously-acquired mutex.
Definition sync.h:71
void snrt_disable_multicast()
Disable LSU multicast.
Definition sync.h:312
void snrt_global_reduction_dma(double *dst_buffer, double *src_buffer, size_t len)
Perform a sum reduction among clusters, blocking.
Definition sync.h:232