snRuntime/src/ssr.h
Types
Name | |
---|---|
enum | snrt_ssr_dm { SNRT_SSR_DM0 = 0, SNRT_SSR_DM1 = 1, SNRT_SSR_DM2 = 2, SNRT_SSR_DM_ALL = 31} The different SSR data movers. |
enum | snrt_ssr_dim { SNRT_SSR_1D = 0, SNRT_SSR_2D = 1, SNRT_SSR_3D = 2, SNRT_SSR_4D = 3} The different dimensions. |
enum | @1 { REG_STATUS = 0, REG_REPEAT = 1, REG_BOUNDS = 2, REG_STRIDES = 6, REG_RPTR = 24, REG_WPTR = 28} The SSR configuration registers. |
Functions
Name | |
---|---|
void | snrt_fpu_fence() Synchronize the integer and float pipelines. |
void | snrt_ssr_enable() Enable SSR. |
void | snrt_ssr_disable() Disable SSR. |
uint32_t | read_ssr_cfg(uint32_t reg, uint32_t dm) |
void | write_ssr_cfg(uint32_t reg, uint32_t dm, uint32_t value) |
void | snrt_ssr_loop_1d(enum snrt_ssr_dm dm, size_t b0, size_t s0) |
void | snrt_ssr_loop_2d(enum snrt_ssr_dm dm, size_t b0, size_t b1, size_t s0, size_t s1) |
void | snrt_ssr_loop_3d(enum snrt_ssr_dm dm, size_t b0, size_t b1, size_t b2, size_t s0, size_t s1, size_t s2) |
void | snrt_ssr_loop_4d(enum snrt_ssr_dm dm, size_t b0, size_t b1, size_t b2, size_t b3, size_t s0, size_t s1, size_t s2, size_t s3) |
void | snrt_ssr_repeat(enum snrt_ssr_dm dm, size_t count) Configure the repetition count for a stream. |
void | snrt_ssr_read(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim, volatile void * ptr) Start a streaming read. |
void | snrt_ssr_write(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim, volatile void * ptr) Start a streaming write. |
Types Documentation
enum snrt_ssr_dm
Enumerator | Value | Description |
---|---|---|
SNRT_SSR_DM0 | 0 | |
SNRT_SSR_DM1 | 1 | |
SNRT_SSR_DM2 | 2 | |
SNRT_SSR_DM_ALL | 31 |
The different SSR data movers.
enum snrt_ssr_dim
Enumerator | Value | Description |
---|---|---|
SNRT_SSR_1D | 0 | |
SNRT_SSR_2D | 1 | |
SNRT_SSR_3D | 2 | |
SNRT_SSR_4D | 3 |
The different dimensions.
enum @1
Enumerator | Value | Description |
---|---|---|
REG_STATUS | 0 | |
REG_REPEAT | 1 | |
REG_BOUNDS | 2 | |
REG_STRIDES | 6 | |
REG_RPTR | 24 | |
REG_WPTR | 28 |
The SSR configuration registers.
Functions Documentation
function snrt_fpu_fence
inline void snrt_fpu_fence()
Synchronize the integer and float pipelines.
function snrt_ssr_enable
inline void snrt_ssr_enable()
Enable SSR.
function snrt_ssr_disable
inline void snrt_ssr_disable()
Disable SSR.
function read_ssr_cfg
inline uint32_t read_ssr_cfg(
uint32_t reg,
uint32_t dm
)
function write_ssr_cfg
inline void write_ssr_cfg(
uint32_t reg,
uint32_t dm,
uint32_t value
)
function snrt_ssr_loop_1d
inline void snrt_ssr_loop_1d(
enum snrt_ssr_dm dm,
size_t b0,
size_t s0
)
function snrt_ssr_loop_2d
inline void snrt_ssr_loop_2d(
enum snrt_ssr_dm dm,
size_t b0,
size_t b1,
size_t s0,
size_t s1
)
function snrt_ssr_loop_3d
inline void snrt_ssr_loop_3d(
enum snrt_ssr_dm dm,
size_t b0,
size_t b1,
size_t b2,
size_t s0,
size_t s1,
size_t s2
)
function snrt_ssr_loop_4d
inline void snrt_ssr_loop_4d(
enum snrt_ssr_dm dm,
size_t b0,
size_t b1,
size_t b2,
size_t b3,
size_t s0,
size_t s1,
size_t s2,
size_t s3
)
function snrt_ssr_repeat
inline void snrt_ssr_repeat(
enum snrt_ssr_dm dm,
size_t count
)
Configure the repetition count for a stream.
function snrt_ssr_read
inline void snrt_ssr_read(
enum snrt_ssr_dm dm,
enum snrt_ssr_dim dim,
volatile void * ptr
)
Start a streaming read.
function snrt_ssr_write
inline void snrt_ssr_write(
enum snrt_ssr_dm dm,
enum snrt_ssr_dim dim,
volatile void * ptr
)
Start a streaming write.
Source code
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
inline void snrt_fpu_fence() {
unsigned tmp;
asm volatile(
"fmv.x.w %0, fa0\n"
"mv %0, %0\n"
: "+r"(tmp)::"memory");
}
enum snrt_ssr_dm {
SNRT_SSR_DM0 = 0,
SNRT_SSR_DM1 = 1,
SNRT_SSR_DM2 = 2,
// To write to all SSRs, use index 31
SNRT_SSR_DM_ALL = 31,
};
enum snrt_ssr_dim {
SNRT_SSR_1D = 0,
SNRT_SSR_2D = 1,
SNRT_SSR_3D = 2,
SNRT_SSR_4D = 3,
};
enum {
REG_STATUS = 0,
REG_REPEAT = 1,
REG_BOUNDS = 2, // + loop index
REG_STRIDES = 6, // + loop index
REG_RPTR = 24, // + snrt_ssr_dim
REG_WPTR = 28, // + snrt_ssr_dim
};
inline void snrt_ssr_enable() {
#ifdef __TOOLCHAIN_LLVM__
__builtin_ssr_enable();
#else
asm volatile("csrsi 0x7C0, 1\n");
#endif
}
inline void snrt_ssr_disable() {
#ifdef __TOOLCHAIN_LLVM__
__builtin_ssr_disable();
#else
asm volatile("csrci 0x7C0, 1\n");
#endif
}
inline uint32_t read_ssr_cfg(uint32_t reg, uint32_t dm) {
uint32_t value;
asm volatile("scfgri %[value], %[dm] | %[reg]<<5\n"
: [ value ] "=r"(value)
: [ dm ] "i"(dm), [ reg ] "i"(reg));
return value;
}
inline void write_ssr_cfg(uint32_t reg, uint32_t dm, uint32_t value) {
asm volatile("scfgwi %[value], %[dm] | %[reg]<<5\n" ::[value] "r"(value),
[ dm ] "i"(dm), [ reg ] "i"(reg));
}
// Configure an SSR data mover for a 1D loop nest.
inline void snrt_ssr_loop_1d(enum snrt_ssr_dm dm, size_t b0, size_t s0) {
--b0;
write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
size_t a = 0;
write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
a += s0 * b0;
}
// Configure an SSR data mover for a 2D loop nest.
inline void snrt_ssr_loop_2d(enum snrt_ssr_dm dm, size_t b0, size_t b1,
size_t s0, size_t s1) {
--b0;
--b1;
write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
write_ssr_cfg(REG_BOUNDS + 1, dm, b1);
size_t a = 0;
write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
a += s0 * b0;
write_ssr_cfg(REG_STRIDES + 1, dm, s1 - a);
a += s1 * b1;
}
// Configure an SSR data mover for a 3D loop nest.
inline void snrt_ssr_loop_3d(enum snrt_ssr_dm dm, size_t b0, size_t b1,
size_t b2, size_t s0, size_t s1, size_t s2) {
--b0;
--b1;
--b2;
write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
write_ssr_cfg(REG_BOUNDS + 1, dm, b1);
write_ssr_cfg(REG_BOUNDS + 2, dm, b2);
size_t a = 0;
write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
a += s0 * b0;
write_ssr_cfg(REG_STRIDES + 1, dm, s1 - a);
a += s1 * b1;
write_ssr_cfg(REG_STRIDES + 2, dm, s2 - a);
a += s2 * b2;
}
// Configure an SSR data mover for a 4D loop nest.
// b0: Inner-most bound (limit of loop)
// b3: Outer-most bound (limit of loop)
// s0: increment size of inner-most loop
inline void snrt_ssr_loop_4d(enum snrt_ssr_dm dm, size_t b0, size_t b1,
size_t b2, size_t b3, size_t s0, size_t s1,
size_t s2, size_t s3) {
--b0;
--b1;
--b2;
--b3;
write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
write_ssr_cfg(REG_BOUNDS + 1, dm, b1);
write_ssr_cfg(REG_BOUNDS + 2, dm, b2);
write_ssr_cfg(REG_BOUNDS + 3, dm, b3);
size_t a = 0;
write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
a += s0 * b0;
write_ssr_cfg(REG_STRIDES + 1, dm, s1 - a);
a += s1 * b1;
write_ssr_cfg(REG_STRIDES + 2, dm, s2 - a);
a += s2 * b2;
write_ssr_cfg(REG_STRIDES + 3, dm, s3 - a);
a += s3 * b3;
}
inline void snrt_ssr_repeat(enum snrt_ssr_dm dm, size_t count) {
write_ssr_cfg(REG_REPEAT, dm, count - 1);
}
inline void snrt_ssr_read(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim,
volatile void *ptr) {
write_ssr_cfg(REG_RPTR + dim, dm, (uintptr_t)ptr);
}
inline void snrt_ssr_write(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim,
volatile void *ptr) {
write_ssr_cfg(REG_WPTR + dim, dm, (uintptr_t)ptr);
}
Updated on 2023-06-19 at 09:43:56 +0000