Skip to content

applications/src/kernels/nnlinear_baseline.h

Classes

Name
union v2s
union v4s
union v8s

Types

Name
typedef float v2f32((vector_size(8))) attribute

Functions

Name
void SoftMax_baseline(float * activations, int length)
void FeedForward_baseline(float * image, float * activations, float * biases, float * weights)
void GradientUpdate_baseline(float * image, float * activations, float * biases, float * weights, float * W_gradients, float * b_gradients, uint32_t label, float * loss)
void TrainingStep_baseline(float * biases, float * weights, float * W_gradients, float * b_gradients, float learning_rate)

Defines

Name
NUM_CLASSES
IN_CH
BATCH_SIZE

Types Documentation

typedef attribute

typedef char v8f8 __attribute__((vector_size(8)));

Functions Documentation

function SoftMax_baseline

static inline void SoftMax_baseline(
    float * activations,
    int length
)

SoftMax calculation

function FeedForward_baseline

static inline void FeedForward_baseline(
    float * image,
    float * activations,
    float * biases,
    float * weights
)

FeedForward calculation

function GradientUpdate_baseline

static inline void GradientUpdate_baseline(
    float * image,
    float * activations,
    float * biases,
    float * weights,
    float * W_gradients,
    float * b_gradients,
    uint32_t label,
    float * loss
)

Gradient update calculation

function TrainingStep_baseline

static inline void TrainingStep_baseline(
    float * biases,
    float * weights,
    float * W_gradients,
    float * b_gradients,
    float learning_rate
)

Training step calculation

Macros Documentation

define NUM_CLASSES

#define NUM_CLASSES 10

Baseline kernels for a single core execution

define IN_CH

#define IN_CH 784

define BATCH_SIZE

#define BATCH_SIZE 256

Source code

// Copyright 2020 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "math.h"
#include "printf.h"
#include "snrt.h"
#include "utils.h"

typedef float v2f32 __attribute__((vector_size(8)));
typedef __fp16 v4f16 __attribute__((vector_size(8)));
typedef char v8f8 __attribute__((vector_size(8)));

typedef union {
    double f64;
    v2f32 vec;
} v2s;
typedef union {
    double f64;
    v4f16 vec;
} v4s;
typedef union {
    double f64;
    v8f8 vec;
} v8s;

#define NUM_CLASSES 10
#define IN_CH 784
#define BATCH_SIZE 256

static inline void SoftMax_baseline(float *activations, int length) {
    // printf("============= SoftMax feedforward start =============\n");
    float sum = 0;
    float max = activations[0];
    int correct, predict = 0;

    // Get the maximum value of all activations
    for (int i = 1; i < length; i++) {
        if (activations[i] > max) {
            max = activations[i];
        }
    }

    // normalize
    for (int i = 0; i < length; i++) {
        activations[i] = exp(activations[i] - max);
        sum += activations[i];
    }

    // compute softmax activations
    for (int i = 0; i < length; i++) {
        activations[i] /= sum;
        // printf("activations[%d] = %f\n", i, activations[i]);
    }

    // printf("============= SoftMax feedforward end =============\n");

    // snrt_cluster_hw_barrier();
}

static inline void FeedForward_baseline(float *image, float *activations,
                                        float *biases, float *weights) {
    // printf("============= Feedforward pass start =============\n");

    // float checksum = 0;
    // float img_checksum = 0;
    // float weight_checksum = 0;
    for (int i = 0; i < NUM_CLASSES; i++) {
        activations[i] = biases[i];
        for (int j = 0; j < IN_CH; j++) {
            // img_checksum += image[j];
            // weight_checksum += weights[i * IN_CH + j];
            activations[i] += weights[i * IN_CH + j] * image[j];
        }

        // checksum += activations[i];

        // printf("activations[%d] = %f\n", i, activations[i]);
    }

    // printf("Activation checksum = %f\n", checksum);
    // printf("Image FeedForward checksum = %f\n", img_checksum);
    // printf("Weight FeedForward checksum = %f\n", weight_checksum);

    // printf("============= Feedforward pass end =============\n");

    // snrt_cluster_hw_barrier();

    SoftMax_baseline(activations, NUM_CLASSES);
}

static inline void GradientUpdate_baseline(float *image, float *activations,
                                           float *biases, float *weights,
                                           float *W_gradients,
                                           float *b_gradients, uint32_t label,
                                           float *loss) {
    FeedForward_baseline(image, activations, biases, weights);

    loss[0] = 0.0f - log(activations[label]);
    // printf("loss = %f, label = %u, activation = %f\n", loss[0], label,
    // activations[label]);

    snrt_cluster_hw_barrier();

    float b_grad, W_grad;
    for (int i = 0; i < NUM_CLASSES; i++) {
        b_grad = (i == label) ? (activations[i] - 1) : activations[i];
        for (int j = 0; j < IN_CH; j++) {
            W_grad = b_grad * image[j];
            W_gradients[i * IN_CH + j] += W_grad;
        }

        b_gradients[i] += b_grad;
    }

    // return loss;
    snrt_cluster_hw_barrier();
}

static inline void TrainingStep_baseline(float *biases, float *weights,
                                         float *W_gradients, float *b_gradients,
                                         float learning_rate) {
    // float b_checksum = 0;
    // float W_checksum = 0;
    // float b_grad_checksum = 0;
    // float W_grad_checksum = 0;
    for (int i = 0; i < NUM_CLASSES; i++) {
        biases[i] -= learning_rate * b_gradients[i] / BATCH_SIZE;
        // b_grad_checksum += b_gradients[i];
        // b_checksum += biases[i];
        for (int j = 0; j < IN_CH; j++) {
            weights[i * IN_CH + j] -=
                learning_rate * W_gradients[i * IN_CH + j] / BATCH_SIZE;
            // W_checksum += weights[i * IN_CH + j];
            // W_grad_checksum += W_gradients[i * IN_CH + j];
        }
    }

    // printf("b_checksum = %f\n", b_checksum);
    // printf("W_checksum = %f\n", W_checksum);
    // printf("b_grad_checksum = %f\n", b_grad_checksum);
    // printf("W_grad_checksum = %f\n", W_grad_checksum);

    snrt_cluster_hw_barrier();
}

Updated on 2023-06-19 at 09:43:56 +0000