Snitch Runtime
Loading...
Searching...
No Matches
ssr.h File Reference

This file contains functions to conveniently program Snitch's SSRs. More...

Go to the source code of this file.

Enumerations

enum  snrt_ssr_dm_t { SNRT_SSR_DM0 = 0 , SNRT_SSR_DM1 = 1 , SNRT_SSR_DM2 = 2 , SNRT_SSR_DM_ALL = 31 }
 The different SSRs. More...
 
enum  snrt_ssr_dim_t { SNRT_SSR_1D = 0 , SNRT_SSR_2D = 1 , SNRT_SSR_3D = 2 , SNRT_SSR_4D = 3 }
 The different dimensions. More...
 
enum  snrt_ssr_reg_t {
  SNRT_SSR_REG_STATUS = 0 , SNRT_SSR_REG_REPEAT = 1 , SNRT_SSR_REG_BOUNDS = 2 , SNRT_SSR_REG_STRIDES = 6 ,
  SNRT_SSR_REG_IDX_CFG = 10 , SNRT_SSR_REG_IDX_BASE = 11 , SNRT_SSR_REG_RPTR_INDIR = 16 , SNRT_SSR_REG_RPTR = 24 ,
  SNRT_SSR_REG_WPTR = 28
}
 The SSR configuration registers. More...
 
enum  snrt_ssr_idxsize_t { SNRT_SSR_IDXSIZE_U8 = 0 , SNRT_SSR_IDXSIZE_U16 = 1 , SNRT_SSR_IDXSIZE_U32 = 2 , SNRT_SSR_IDXSIZE_U64 = 3 }
 The size of the SSSR indirection indices. More...
 

Functions

void snrt_fpu_fence ()
 Synchronize the integer and float pipelines.
 
void snrt_ssr_enable ()
 Enable all SSRs.
 
void snrt_ssr_disable ()
 Disable all SSRs.
 
void snrt_sc_enable (uint32_t mask)
 Enable scalar chaining.
 
void snrt_sc_disable (uint32_t mask)
 Disable scalar chaining.
 
static uint32_t read_ssr_cfg (const snrt_ssr_reg_t reg, const snrt_ssr_dm_t dm)
 Read the value of an SSR configuration register.
 
static void write_ssr_cfg (const snrt_ssr_reg_t reg, const snrt_ssr_dm_t dm, uint32_t value)
 Write a value to an SSR configuration register.
 
static void snrt_ssr_loop_1d (const snrt_ssr_dm_t dm, size_t b0, size_t s0)
 Configure an SSR data mover for a 1D loop nest.
 
static void snrt_ssr_loop_2d (const snrt_ssr_dm_t dm, size_t b0, size_t b1, size_t s0, size_t s1)
 Configure an SSR data mover for a 2D loop nest.
 
static void snrt_ssr_loop_3d (const snrt_ssr_dm_t dm, size_t b0, size_t b1, size_t b2, size_t s0, size_t s1, size_t s2)
 Configure an SSR data mover for a 3D loop nest.
 
static void snrt_ssr_loop_4d (const snrt_ssr_dm_t dm, size_t b0, size_t b1, size_t b2, size_t b3, size_t s0, size_t s1, size_t s2, size_t s3)
 Configure an SSR data mover for a 4D loop nest.
 
static void snrt_ssr_repeat (const snrt_ssr_dm_t dm, size_t count)
 Configure the repetition count for a stream.
 
static void snrt_ssr_read (const snrt_ssr_dm_t dm, const snrt_ssr_dim_t dim, volatile void *ptr)
 Start a streaming read.
 
static void snrt_ssr_write (const snrt_ssr_dm_t dm, const snrt_ssr_dim_t dim, volatile void *ptr)
 Start a streaming write.
 
static void snrt_issr_set_idx_cfg (const snrt_ssr_dm_t dm, snrt_ssr_idxsize_t idxsize)
 Start a streaming indirect read.
 
static void snrt_issr_set_bound (const snrt_ssr_dm_t dm, size_t bound)
 
static void snrt_issr_set_ptrs (const snrt_ssr_dm_t dm, volatile void *base, volatile void *idcs)
 
static void snrt_issr_read (const snrt_ssr_dm_t dm, volatile void *base, volatile void *idcs, size_t bound, snrt_ssr_idxsize_t idxsize)
 

Detailed Description

This file contains functions to conveniently program Snitch's SSRs.

An SSR stream can be configured to replace a store (or load) sequence as could be generated by an N-dimensional affine loop nest, e.g. the following store sequence can be mapped to a 2D write SSR:

for (int i = 0; i < b1; i++)
for (int j = 0; j < b0; j++)
ptr[i * s1 + j * s0] = 0;

An ISSR (Indirect SSR) stream can be configured to replace a store (or load) sequence as could be generated by:

for (int i = 0; i < bound; i++)
base[idcs[i]] = 0;

The convenience functions provided in this file can be used to set up such access patterns. The function argument names reflect the variable names presented in these sample code snippets.

Note: The exact number of elements configured in an (I)SSR stream must be consumed. Failure to comply with this requirement will result in undefined behaviour.

Enumeration Type Documentation

◆ snrt_ssr_dim_t

The different dimensions.

Enumerator
SNRT_SSR_1D 

1D stream

SNRT_SSR_2D 

2D stream

SNRT_SSR_3D 

3D stream

SNRT_SSR_4D 

4D stream

60 {
61 SNRT_SSR_1D = 0,
62 SNRT_SSR_2D = 1,
63 SNRT_SSR_3D = 2,
64 SNRT_SSR_4D = 3
snrt_ssr_dim_t
The different dimensions.
Definition ssr.h:60
@ SNRT_SSR_2D
Definition ssr.h:62
@ SNRT_SSR_1D
Definition ssr.h:61
@ SNRT_SSR_3D
Definition ssr.h:63
@ SNRT_SSR_4D
Definition ssr.h:64

◆ snrt_ssr_dm_t

The different SSRs.

Enumerator
SNRT_SSR_DM0 

SSR data mover 0

SNRT_SSR_DM1 

SSR data mover 1

SNRT_SSR_DM2 

SSR data mover 2

SNRT_SSR_DM_ALL 

Write to all SSRs

50 {
51 SNRT_SSR_DM0 = 0,
52 SNRT_SSR_DM1 = 1,
53 SNRT_SSR_DM2 = 2,
54 SNRT_SSR_DM_ALL = 31
snrt_ssr_dm_t
The different SSRs.
Definition ssr.h:50
@ SNRT_SSR_DM0
Definition ssr.h:51
@ SNRT_SSR_DM_ALL
Definition ssr.h:54
@ SNRT_SSR_DM1
Definition ssr.h:52
@ SNRT_SSR_DM2
Definition ssr.h:53

◆ snrt_ssr_idxsize_t

The size of the SSSR indirection indices.

Enumerator
SNRT_SSR_IDXSIZE_U8 

Unsigned 8-bit integer

SNRT_SSR_IDXSIZE_U16 

Unsigned 16-bit integer

SNRT_SSR_IDXSIZE_U32 

Unsigned 32-bit integer

SNRT_SSR_IDXSIZE_U64 

Unsigned 64-bit integer

85 {
snrt_ssr_idxsize_t
The size of the SSSR indirection indices.
Definition ssr.h:85
@ SNRT_SSR_IDXSIZE_U8
Definition ssr.h:86
@ SNRT_SSR_IDXSIZE_U16
Definition ssr.h:87
@ SNRT_SSR_IDXSIZE_U32
Definition ssr.h:88
@ SNRT_SSR_IDXSIZE_U64
Definition ssr.h:89

◆ snrt_ssr_reg_t

The SSR configuration registers.

Enumerator
SNRT_SSR_REG_STATUS 

SSR status register

SNRT_SSR_REG_REPEAT 

SSR repeat register

SNRT_SSR_REG_BOUNDS 

SSR bounds register

SNRT_SSR_REG_STRIDES 

SSR strides register

SNRT_SSR_REG_IDX_CFG 

SSSR index configuration register

SNRT_SSR_REG_IDX_BASE 

SSSR base address register

SNRT_SSR_REG_RPTR_INDIR 

SSSR indir. indices read ptr register

SNRT_SSR_REG_RPTR 

SSR read pointer register

SNRT_SSR_REG_WPTR 

SSR write pointer register

70 {
snrt_ssr_reg_t
The SSR configuration registers.
Definition ssr.h:70
@ SNRT_SSR_REG_IDX_CFG
Definition ssr.h:75
@ SNRT_SSR_REG_RPTR
Definition ssr.h:78
@ SNRT_SSR_REG_WPTR
Definition ssr.h:79
@ SNRT_SSR_REG_BOUNDS
Definition ssr.h:73
@ SNRT_SSR_REG_RPTR_INDIR
Definition ssr.h:77
@ SNRT_SSR_REG_REPEAT
Definition ssr.h:72
@ SNRT_SSR_REG_STATUS
Definition ssr.h:71
@ SNRT_SSR_REG_STRIDES
Definition ssr.h:74
@ SNRT_SSR_REG_IDX_BASE
Definition ssr.h:76

Function Documentation

◆ read_ssr_cfg()

static uint32_t read_ssr_cfg ( const snrt_ssr_reg_t reg,
const snrt_ssr_dm_t dm )
inlinestatic

Read the value of an SSR configuration register.

Parameters
regThe register index.
dmThe SSR index.
Returns
The value of the register.
137 {
138 uint32_t value;
139 asm volatile("scfgri %[value], %[dm] | %[reg]<<5\n"
140 : [ value ] "=r"(value)
141 : [ dm ] "i"(dm), [ reg ] "i"(reg));
142 return value;
143}

◆ snrt_fpu_fence()

void snrt_fpu_fence ( )
inline

Synchronize the integer and float pipelines.

39 {
40 unsigned tmp;
41 asm volatile(
42 "fmv.x.w %0, fa0\n"
43 "mv %0, %0\n"
44 : "+r"(tmp)::"memory");
45}

◆ snrt_issr_read()

static void snrt_issr_read ( const snrt_ssr_dm_t dm,
volatile void * base,
volatile void * idcs,
size_t bound,
snrt_ssr_idxsize_t idxsize )
inlinestatic
320 {
321 snrt_issr_set_idx_cfg(dm, idxsize);
322 snrt_issr_set_bound(dm, bound);
323 snrt_issr_set_ptrs(dm, base, idcs);
324}
static void snrt_issr_set_idx_cfg(const snrt_ssr_dm_t dm, snrt_ssr_idxsize_t idxsize)
Start a streaming indirect read.
Definition ssr.h:302

◆ snrt_issr_set_bound()

static void snrt_issr_set_bound ( const snrt_ssr_dm_t dm,
size_t bound )
inlinestatic
307 {
309}
static void write_ssr_cfg(const snrt_ssr_reg_t reg, const snrt_ssr_dm_t dm, uint32_t value)
Write a value to an SSR configuration register.
Definition ssr.h:156

◆ snrt_issr_set_idx_cfg()

static void snrt_issr_set_idx_cfg ( const snrt_ssr_dm_t dm,
snrt_ssr_idxsize_t idxsize )
inlinestatic

Start a streaming indirect read.

Parameters
dmThe SSSR index.
baseThe base pointer to the data.
ptrThe pointer to the indirection indices.
boundThe bound of the first (and only) loop.
idxsizeThe size of the indices.
303 {
304 write_ssr_cfg(SNRT_SSR_REG_IDX_CFG, dm, (idxsize & 0xFF));
305}

◆ snrt_issr_set_ptrs()

static void snrt_issr_set_ptrs ( const snrt_ssr_dm_t dm,
volatile void * base,
volatile void * idcs )
inlinestatic
313 {
314 write_ssr_cfg(SNRT_SSR_REG_IDX_BASE, dm, (uintptr_t)base);
315 write_ssr_cfg(SNRT_SSR_REG_RPTR_INDIR, dm, (uintptr_t)idcs);
316}

◆ snrt_sc_disable()

void snrt_sc_disable ( uint32_t mask)
inline

Disable scalar chaining.

126 {
127 asm volatile("csrc 0x7C3, %[mask]\n" : : [ mask ] "r"(mask) :);
128}

◆ snrt_sc_enable()

void snrt_sc_enable ( uint32_t mask)
inline

Enable scalar chaining.

Parameters
maskA bitmask indicating which registers should be enabled for chaining.
119 {
120 asm volatile("csrs 0x7C3, %[mask]\n" : : [ mask ] "r"(mask) :);
121}

◆ snrt_ssr_disable()

void snrt_ssr_disable ( )
inline

Disable all SSRs.

106 {
107#ifdef __TOOLCHAIN_LLVM__
108 __builtin_ssr_disable();
109#else
110 asm volatile("csrci 0x7C0, 1\n");
111#endif
112}

◆ snrt_ssr_enable()

void snrt_ssr_enable ( )
inline

Enable all SSRs.

95 {
96#ifdef __TOOLCHAIN_LLVM__
97 __builtin_ssr_enable();
98#else
99 asm volatile("csrsi 0x7C0, 1\n");
100#endif
101}

◆ snrt_ssr_loop_1d()

static void snrt_ssr_loop_1d ( const snrt_ssr_dm_t dm,
size_t b0,
size_t s0 )
inlinestatic

Configure an SSR data mover for a 1D loop nest.

Parameters
dmThe SSR index.
b0The bound of the loop.
s0The stride of the loop.
169 {
170 --b0;
172 size_t a = 0;
174 a += s0 * b0;
175}

◆ snrt_ssr_loop_2d()

static void snrt_ssr_loop_2d ( const snrt_ssr_dm_t dm,
size_t b0,
size_t b1,
size_t s0,
size_t s1 )
inlinestatic

Configure an SSR data mover for a 2D loop nest.

Parameters
dmThe SSR index.
b0The bound of the first loop.
b1The bound of the second loop.
s0The stride of the first loop.
s1The stride of the second loop.
186 {
187 --b0;
188 --b1;
191 size_t a = 0;
193 a += s0 * b0;
195 a += s1 * b1;
196}

◆ snrt_ssr_loop_3d()

static void snrt_ssr_loop_3d ( const snrt_ssr_dm_t dm,
size_t b0,
size_t b1,
size_t b2,
size_t s0,
size_t s1,
size_t s2 )
inlinestatic

Configure an SSR data mover for a 3D loop nest.

Parameters
dmThe SSR index.
b0The bound of the first loop.
b1The bound of the second loop.
b2The bound of the third loop.
s0The stride of the first loop.
s1The stride of the second loop.
s2The stride of the third loop.
210 {
211 --b0;
212 --b1;
213 --b2;
217 size_t a = 0;
219 a += s0 * b0;
221 a += s1 * b1;
223 a += s2 * b2;
224}

◆ snrt_ssr_loop_4d()

static void snrt_ssr_loop_4d ( const snrt_ssr_dm_t dm,
size_t b0,
size_t b1,
size_t b2,
size_t b3,
size_t s0,
size_t s1,
size_t s2,
size_t s3 )
inlinestatic

Configure an SSR data mover for a 4D loop nest.

Parameters
dmThe SSR index.
b0The bound of the first loop.
b1The bound of the second loop.
b2The bound of the third loop.
b3The bound of the fourth loop.
s0The stride of the first loop.
s1The stride of the second loop.
s2The stride of the third loop.
s3The stride of the fourth loop.
240 {
241 --b0;
242 --b1;
243 --b2;
244 --b3;
249 size_t a = 0;
251 a += s0 * b0;
253 a += s1 * b1;
255 a += s2 * b2;
257 a += s3 * b3;
258}

◆ snrt_ssr_read()

static void snrt_ssr_read ( const snrt_ssr_dm_t dm,
const snrt_ssr_dim_t dim,
volatile void * ptr )
inlinestatic

Start a streaming read.

Parameters
dmThe SSR index.
dimThe number of dimensions to use.
ptrThe pointer to the data.
276 {
278 (uintptr_t)ptr);
279}

◆ snrt_ssr_repeat()

static void snrt_ssr_repeat ( const snrt_ssr_dm_t dm,
size_t count )
inlinestatic

Configure the repetition count for a stream.

Parameters
dmThe SSR index.
countThe repetition count.
265 {
266 write_ssr_cfg(SNRT_SSR_REG_REPEAT, dm, count - 1);
267}

◆ snrt_ssr_write()

static void snrt_ssr_write ( const snrt_ssr_dm_t dm,
const snrt_ssr_dim_t dim,
volatile void * ptr )
inlinestatic

Start a streaming write.

Parameters
dmThe SSR index.
dimThe number of dimensions to use.
ptrThe pointer to the data.
289 {
291 (uintptr_t)ptr);
292}

◆ write_ssr_cfg()

static void write_ssr_cfg ( const snrt_ssr_reg_t reg,
const snrt_ssr_dm_t dm,
uint32_t value )
inlinestatic

Write a value to an SSR configuration register.

Parameters
regThe register index.
dmThe SSR index.
valueThe value to write.
Note
The function passes the reg and dm arguments as an immediate, thus these must be known at compile time. As a consequence, the function must use internal linkage (static keyword) and must be always inlined. This is true also for all functions invoking this function, and passing down their own arguments to reg or dm.
157 {
158 asm volatile("scfgwi %[value], %[dm] | %[reg]<<5\n" ::[value] "r"(value),
159 [ dm ] "i"(dm), [ reg ] "i"(reg));
160}