24inline volatile uint32_t *
snrt_mutex() {
return &_snrt_mutex; }
38 " amoswap.w.aq t0,t0,(%0) # t0 = oldlock & lock = 1\n"
39 " bnez t0,1b # Retry if previously set)\n"
57 " amoswap.w.aq t0,t0,(%0) # t0 = oldlock & lock = 1\n"
58 " bnez t0,2b # Retry if previously set)\n"
68 asm volatile(
"amoswap.w.rl x0,x0,(%0) # Release lock by storing 0\n"
82 asm volatile(
"csrr x0, 0x7C2" :::
"memory");
93 uint32_t prev_barrier_iteration = _snrt_barrier.iteration;
95 __atomic_add_fetch(&(_snrt_barrier.cnt), 1, __ATOMIC_RELAXED);
98 if (cnt == snrt_cluster_num()) {
99 _snrt_barrier.cnt = 0;
100 __atomic_add_fetch(&(_snrt_barrier.iteration), 1, __ATOMIC_RELAXED);
102 while (prev_barrier_iteration == _snrt_barrier.iteration)
120 if (snrt_is_dm_core()) {
136 uint32_t prev_it = barr->iteration;
137 uint32_t cnt = __atomic_add_fetch(&barr->cnt, 1, __ATOMIC_RELAXED);
142 __atomic_add_fetch(&barr->iteration, 1, __ATOMIC_RELAXED);
145 while (prev_it == barr->iteration)
165 __atomic_add_fetch(&_reduction_result, value, __ATOMIC_RELAXED);
167 return _reduction_result;
188 if (snrt_cluster_num() == 1) {
189 if (!snrt_is_compute_core()) {
190 snrt_dma_start_1d(dst_buffer, src_buffer, len *
sizeof(
double));
196 int num_levels = ceil(log2(snrt_cluster_num()));
197 for (
unsigned int level = 0; level < num_levels; level++) {
202 uint32_t is_active = (snrt_cluster_idx() % (1 << level)) == 0;
203 uint32_t is_sender = (snrt_cluster_idx() % (1 << (level + 1))) != 0;
207 if (is_active && is_sender) {
208 if (!snrt_is_compute_core()) {
210 (
void *)dst_buffer - (1 << level) * SNRT_CLUSTER_OFFSET;
211 snrt_dma_start_1d(dst, src_buffer, len *
sizeof(
double));
220 if (is_active && !is_sender) {
222 if (snrt_is_compute_core()) {
223 uint32_t items_per_core =
224 len / snrt_cluster_compute_core_num();
225 uint32_t core_offset =
226 snrt_cluster_core_idx() * items_per_core;
227 for (uint32_t i = 0; i < items_per_core; i++) {
228 uint32_t abs_i = core_offset + i;
229 dst_buffer[abs_i] += src_buffer[abs_i];
Definition sync_decls.h:9
void snrt_partial_barrier(snrt_barrier_t *barr, uint32_t n)
Generic software barrier.
Definition sync.h:134
void snrt_mutex_ttas_acquire(volatile uint32_t *pmtx)
Acquire a mutex, blocking.
Definition sync.h:50
void snrt_mutex_acquire(volatile uint32_t *pmtx)
Acquire a mutex, blocking.
Definition sync.h:34
volatile uint32_t * snrt_mutex()
Get a pointer to a mutex variable.
Definition sync.h:24
void snrt_inter_cluster_barrier()
Synchronize one core from every cluster with the others.
Definition sync.h:91
uint32_t snrt_global_all_to_all_reduction(uint32_t value)
Perform a global sum reduction, blocking.
Definition sync.h:164
void snrt_global_barrier()
Synchronize all Snitch cores.
Definition sync.h:116
void snrt_cluster_hw_barrier()
Synchronize cores in a cluster with a hardware barrier, blocking.
Definition sync.h:81
void snrt_mutex_release(volatile uint32_t *pmtx)
Release a previously-acquired mutex.
Definition sync.h:67
void snrt_global_reduction_dma(double *dst_buffer, double *src_buffer, size_t len)
Perform a sum reduction among clusters, blocking.
Definition sync.h:185