/github/workspace/src/BasicMathFunctions/mult/plp_mult_f32_parallel.c

Functions

	Name
void	plp_mult_f32_parallel(const float32_t restrict pSrcA, const float32_t restrict pSrcB, uint32_t blockSize, uint32_t nPE, float32_t *restrict pDst) Glue code for parallel dot product of 32-bit float vectors.

Functions Documentation

function plp_mult_f32_parallel

void plp_mult_f32_parallel(
    const float32_t *__restrict__ pSrcA,
    const float32_t *__restrict__ pSrcB,
    uint32_t blockSize,
    uint32_t nPE,
    float32_t *__restrict__ pDst
)

Glue code for parallel dot product of 32-bit float vectors.

Parameters:

pSrcA points to the first input vector
pSrcB points to the second input vector
blockSize number of samples in each vector
nPE number of parallel processing units
pDst points to output vector

Return: none

Source code

/* =====================================================================
 * Project:      PULP DSP Library
 * Title:        plp_mult_f32_parallel.c
 * Description:  32-bit float parallel multiplication glue code
 *
 * $Date:        10. July 2021
 * $Revision:    V0
 *
 * Target Processor: PULP cores
 * ===================================================================== */
/*
 * Copyright (C) 2021 ETH Zurich and University of Bologna.
 *
 * Author: Aron Szakacs, ETH Zurich
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Notice: project inspired by ARM CMSIS DSP and parts of source code
 * ported and adopted for RISC-V PULP platform from ARM CMSIS DSP
 * released under Copyright (C) 2010-2019 ARM Limited or its affiliates
 * with Apache-2.0.
 */

#include "plp_math.h"

void plp_mult_f32_parallel(const float32_t *__restrict__ pSrcA,
                               const float32_t *__restrict__ pSrcB,
                               uint32_t blockSize,
                               uint32_t nPE,
                               float32_t *__restrict__ pDst) {

    if (rt_cluster_id() == ARCHI_FC_CID) {
        printf("parallel processing supported only for cluster side\n");
        return;
    } else {

        uint32_t i, tmpblkSizePE = blockSize / nPE;

        plp_mult_instance_f32 S;

        // Initialize the plp_dot_prod_instance
        S.pSrcA = pSrcA;
        // printf("pSrcA[0] %d\n", pSrcA[0]);
        S.pSrcB = pSrcB;
        S.blkSizePE = tmpblkSizePE;
        S.nPE = nPE;
        S.pDst = pDst;

        // Fork the dot product to nPE cores (i.e. processing units)
        rt_team_fork(nPE, plp_mult_f32p_xpulpv2, (void *)&S);

        /* #if defined(PLP_MATH_LOOPUNROLL) */
        /* #undef PLP_MATH_LOOPUNROLL */
        /* #endif */

        /* #if defined(PLP_MATH_LOOPUNROLL) */
        /*     //uint32_t blkCnt = blockSize/nPE/2 * 2 * nPE; */
        /*     //printf("blkCnt %d\n", blkCnt); */
        /*     for (i= ((tmpblkSizePE>>1) <<1) * nPE; i<blockSize; i++){ */
        /*       sum += pSrcA[i] * pSrcB[i]; */
        /*     } */
        /* #else // PLP_MATH_LOOPUNROLL */
        for (i = (tmpblkSizePE)*nPE; i < blockSize; i++) {
            pDst[i] = pSrcA[i] * pSrcB[i];
        }
        /* #endif */

        /* #define PLP_MATH_LOOPUNROLL */
    }
}

Updated on 2023-03-01 at 16:16:32 +0000