Skip to content

snRuntime/src/ssr.h

Types

Name
enum snrt_ssr_dm { SNRT_SSR_DM0 = 0, SNRT_SSR_DM1 = 1, SNRT_SSR_DM2 = 2, SNRT_SSR_DM_ALL = 31}
The different SSR data movers.
enum snrt_ssr_dim { SNRT_SSR_1D = 0, SNRT_SSR_2D = 1, SNRT_SSR_3D = 2, SNRT_SSR_4D = 3}
The different dimensions.
enum @1 { REG_STATUS = 0, REG_REPEAT = 1, REG_BOUNDS = 2, REG_STRIDES = 6, REG_RPTR = 24, REG_WPTR = 28}
The SSR configuration registers.

Functions

Name
void snrt_fpu_fence()
Synchronize the integer and float pipelines.
void snrt_ssr_enable()
Enable SSR.
void snrt_ssr_disable()
Disable SSR.
uint32_t read_ssr_cfg(uint32_t reg, uint32_t dm)
void write_ssr_cfg(uint32_t reg, uint32_t dm, uint32_t value)
void snrt_ssr_loop_1d(enum snrt_ssr_dm dm, size_t b0, size_t s0)
void snrt_ssr_loop_2d(enum snrt_ssr_dm dm, size_t b0, size_t b1, size_t s0, size_t s1)
void snrt_ssr_loop_3d(enum snrt_ssr_dm dm, size_t b0, size_t b1, size_t b2, size_t s0, size_t s1, size_t s2)
void snrt_ssr_loop_4d(enum snrt_ssr_dm dm, size_t b0, size_t b1, size_t b2, size_t b3, size_t s0, size_t s1, size_t s2, size_t s3)
void snrt_ssr_repeat(enum snrt_ssr_dm dm, size_t count)
Configure the repetition count for a stream.
void snrt_ssr_read(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim, volatile void * ptr)
Start a streaming read.
void snrt_ssr_write(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim, volatile void * ptr)
Start a streaming write.

Types Documentation

enum snrt_ssr_dm

Enumerator Value Description
SNRT_SSR_DM0 0
SNRT_SSR_DM1 1
SNRT_SSR_DM2 2
SNRT_SSR_DM_ALL 31

The different SSR data movers.

enum snrt_ssr_dim

Enumerator Value Description
SNRT_SSR_1D 0
SNRT_SSR_2D 1
SNRT_SSR_3D 2
SNRT_SSR_4D 3

The different dimensions.

enum @1

Enumerator Value Description
REG_STATUS 0
REG_REPEAT 1
REG_BOUNDS 2
REG_STRIDES 6
REG_RPTR 24
REG_WPTR 28

The SSR configuration registers.

Functions Documentation

function snrt_fpu_fence

inline void snrt_fpu_fence()

Synchronize the integer and float pipelines.

function snrt_ssr_enable

inline void snrt_ssr_enable()

Enable SSR.

function snrt_ssr_disable

inline void snrt_ssr_disable()

Disable SSR.

function read_ssr_cfg

inline uint32_t read_ssr_cfg(
    uint32_t reg,
    uint32_t dm
)

function write_ssr_cfg

inline void write_ssr_cfg(
    uint32_t reg,
    uint32_t dm,
    uint32_t value
)

function snrt_ssr_loop_1d

inline void snrt_ssr_loop_1d(
    enum snrt_ssr_dm dm,
    size_t b0,
    size_t s0
)

function snrt_ssr_loop_2d

inline void snrt_ssr_loop_2d(
    enum snrt_ssr_dm dm,
    size_t b0,
    size_t b1,
    size_t s0,
    size_t s1
)

function snrt_ssr_loop_3d

inline void snrt_ssr_loop_3d(
    enum snrt_ssr_dm dm,
    size_t b0,
    size_t b1,
    size_t b2,
    size_t s0,
    size_t s1,
    size_t s2
)

function snrt_ssr_loop_4d

inline void snrt_ssr_loop_4d(
    enum snrt_ssr_dm dm,
    size_t b0,
    size_t b1,
    size_t b2,
    size_t b3,
    size_t s0,
    size_t s1,
    size_t s2,
    size_t s3
)

function snrt_ssr_repeat

inline void snrt_ssr_repeat(
    enum snrt_ssr_dm dm,
    size_t count
)

Configure the repetition count for a stream.

function snrt_ssr_read

inline void snrt_ssr_read(
    enum snrt_ssr_dm dm,
    enum snrt_ssr_dim dim,
    volatile void * ptr
)

Start a streaming read.

function snrt_ssr_write

inline void snrt_ssr_write(
    enum snrt_ssr_dm dm,
    enum snrt_ssr_dim dim,
    volatile void * ptr
)

Start a streaming write.

Source code

// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

inline void snrt_fpu_fence() {
    unsigned tmp;
    asm volatile(
        "fmv.x.w %0, fa0\n"
        "mv      %0, %0\n"
        : "+r"(tmp)::"memory");
}

enum snrt_ssr_dm {
    SNRT_SSR_DM0 = 0,
    SNRT_SSR_DM1 = 1,
    SNRT_SSR_DM2 = 2,
    // To write to all SSRs, use index 31
    SNRT_SSR_DM_ALL = 31,
};

enum snrt_ssr_dim {
    SNRT_SSR_1D = 0,
    SNRT_SSR_2D = 1,
    SNRT_SSR_3D = 2,
    SNRT_SSR_4D = 3,
};

enum {
    REG_STATUS = 0,
    REG_REPEAT = 1,
    REG_BOUNDS = 2,   // + loop index
    REG_STRIDES = 6,  // + loop index
    REG_RPTR = 24,    // + snrt_ssr_dim
    REG_WPTR = 28,    // + snrt_ssr_dim
};

inline void snrt_ssr_enable() {
#ifdef __TOOLCHAIN_LLVM__
    __builtin_ssr_enable();
#else
    asm volatile("csrsi 0x7C0, 1\n");
#endif
}

inline void snrt_ssr_disable() {
#ifdef __TOOLCHAIN_LLVM__
    __builtin_ssr_disable();
#else
    asm volatile("csrci 0x7C0, 1\n");
#endif
}

inline uint32_t read_ssr_cfg(uint32_t reg, uint32_t dm) {
    uint32_t value;
    asm volatile("scfgri %[value], %[dm] | %[reg]<<5\n"
                 : [ value ] "=r"(value)
                 : [ dm ] "i"(dm), [ reg ] "i"(reg));
    return value;
}

inline void write_ssr_cfg(uint32_t reg, uint32_t dm, uint32_t value) {
    asm volatile("scfgwi %[value], %[dm] | %[reg]<<5\n" ::[value] "r"(value),
                 [ dm ] "i"(dm), [ reg ] "i"(reg));
}

// Configure an SSR data mover for a 1D loop nest.
inline void snrt_ssr_loop_1d(enum snrt_ssr_dm dm, size_t b0, size_t s0) {
    --b0;
    write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
    size_t a = 0;
    write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
    a += s0 * b0;
}

// Configure an SSR data mover for a 2D loop nest.
inline void snrt_ssr_loop_2d(enum snrt_ssr_dm dm, size_t b0, size_t b1,
                             size_t s0, size_t s1) {
    --b0;
    --b1;
    write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
    write_ssr_cfg(REG_BOUNDS + 1, dm, b1);
    size_t a = 0;
    write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
    a += s0 * b0;
    write_ssr_cfg(REG_STRIDES + 1, dm, s1 - a);
    a += s1 * b1;
}

// Configure an SSR data mover for a 3D loop nest.
inline void snrt_ssr_loop_3d(enum snrt_ssr_dm dm, size_t b0, size_t b1,
                             size_t b2, size_t s0, size_t s1, size_t s2) {
    --b0;
    --b1;
    --b2;
    write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
    write_ssr_cfg(REG_BOUNDS + 1, dm, b1);
    write_ssr_cfg(REG_BOUNDS + 2, dm, b2);
    size_t a = 0;
    write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
    a += s0 * b0;
    write_ssr_cfg(REG_STRIDES + 1, dm, s1 - a);
    a += s1 * b1;
    write_ssr_cfg(REG_STRIDES + 2, dm, s2 - a);
    a += s2 * b2;
}

// Configure an SSR data mover for a 4D loop nest.
// b0: Inner-most bound (limit of loop)
// b3: Outer-most bound (limit of loop)
// s0: increment size of inner-most loop
inline void snrt_ssr_loop_4d(enum snrt_ssr_dm dm, size_t b0, size_t b1,
                             size_t b2, size_t b3, size_t s0, size_t s1,
                             size_t s2, size_t s3) {
    --b0;
    --b1;
    --b2;
    --b3;
    write_ssr_cfg(REG_BOUNDS + 0, dm, b0);
    write_ssr_cfg(REG_BOUNDS + 1, dm, b1);
    write_ssr_cfg(REG_BOUNDS + 2, dm, b2);
    write_ssr_cfg(REG_BOUNDS + 3, dm, b3);
    size_t a = 0;
    write_ssr_cfg(REG_STRIDES + 0, dm, s0 - a);
    a += s0 * b0;
    write_ssr_cfg(REG_STRIDES + 1, dm, s1 - a);
    a += s1 * b1;
    write_ssr_cfg(REG_STRIDES + 2, dm, s2 - a);
    a += s2 * b2;
    write_ssr_cfg(REG_STRIDES + 3, dm, s3 - a);
    a += s3 * b3;
}

inline void snrt_ssr_repeat(enum snrt_ssr_dm dm, size_t count) {
    write_ssr_cfg(REG_REPEAT, dm, count - 1);
}

inline void snrt_ssr_read(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim,
                          volatile void *ptr) {
    write_ssr_cfg(REG_RPTR + dim, dm, (uintptr_t)ptr);
}

inline void snrt_ssr_write(enum snrt_ssr_dm dm, enum snrt_ssr_dim dim,
                           volatile void *ptr) {
    write_ssr_cfg(REG_WPTR + dim, dm, (uintptr_t)ptr);
}

Updated on 2023-06-19 at 09:43:56 +0000