Skip to content

/github/workspace/src/FilteringFunctions/plp_conv_i32.c

Functions

Name
void plp_conv_i32(const int32_t * pSrcA, const uint32_t srcALen, const int32_t * pSrcB, const uint32_t srcBLen, int32_t *restrict pRes)
Glue code for convolution of 32-bit integer vectors.

Attributes

Name
int32_t * _pRes1_32

Defines

Name
OLARATIO32

Functions Documentation

function plp_conv_i32

void plp_conv_i32(
    const int32_t * pSrcA,
    const uint32_t srcALen,
    const int32_t * pSrcB,
    const uint32_t srcBLen,
    int32_t *__restrict__ pRes
)

Glue code for convolution of 32-bit integer vectors.

Parameters:

  • pSrcA points to the first input vector
  • srcALen Length of the first input vector
  • pSrcB points to the second input vector
  • srcBLen Length of the second input vector
  • pRes output result returned here

Return: none

Attributes Documentation

variable _pRes1_32

static int32_t * _pRes1_32;

Macros Documentation

define OLARATIO32

#define OLARATIO32 10

Source code

/* =====================================================================
 * Project:      PULP DSP Library
 * Title:        plp_dot_prod_i32.c
 * Description:  32-bit integer convolution glue code
 *
 * $Date:        01. July 2019
 * $Revision:    V0
 *
 * Target Processor: PULP cores
 * ===================================================================== */
/*
 * Copyright (C) 2019 ETH Zurich and University of Bologna.
 *
 * Author: Moritz Scherer
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "plp_math.h"
#define OLARATIO32 10 // Eight is optimal in terms of overhead minimization

static int32_t *_pRes1_32;

void plp_conv_i32(const int32_t *pSrcA,
                  const uint32_t srcALen,
                  const int32_t *pSrcB,
                  const uint32_t srcBLen,
                  int32_t *__restrict__ pRes) {

    uint32_t in1Len, in2Len;
    const int32_t *pIn1;
    const int32_t *pIn2;

    if (srcALen >= srcBLen) {
        in1Len = srcALen;
        in2Len = srcBLen;
        pIn1 = pSrcA;
        pIn2 = pSrcB;
    } else {
        in2Len = srcALen;
        in1Len = srcBLen;
        pIn2 = pSrcA;
        pIn1 = pSrcB;
    }

    uint32_t nPE = (OLARATIO32 / (in1Len / in2Len));
    nPE = nPE > 0 ? nPE : 1;
    uint32_t src2Offset = ((in2Len + nPE - 1) / nPE);
    uint32_t resultsoffset = src2Offset + in1Len - 1;
    uint32_t lastresultLen = (in2Len - (src2Offset * (nPE - 1))) + in1Len - 1;

    uint32_t temp1, temp2, k;

    for (uint32_t i = 0; i < srcALen + srcBLen - 1; i++) {
        pRes[i] = 0;
    }

    if (hal_cluster_id() == ARCHI_FC_CID) {

        _pRes1_32 = hal_fc_l1_malloc(sizeof(int32_t) * (resultsoffset));

        int32_t *pOut = pRes;
        int32_t *_pRes = _pRes1_32;

        for (uint32_t i = 0; i < nPE - 1; i++) {
            plp_conv_i32s_rv32im(pIn1, in1Len, pIn2 + i * src2Offset, src2Offset, _pRes1_32);

            pOut = pRes + i * src2Offset;
            _pRes = _pRes1_32;

            k = resultsoffset >> 1;
            while (k) {

                temp1 = *_pRes++;
                temp2 = *_pRes++;

                *pOut++ += temp1;
                *pOut++ += temp2;

                k--;
            }

            k = resultsoffset % 2U;

            if (k) {
                *pOut++ += *_pRes++;
            }
        }

        plp_conv_i32s_rv32im(pIn1, in1Len, pIn2 + (nPE - 1) * src2Offset,
                             in2Len - (src2Offset * (nPE - 1)), _pRes1_32);

        pOut = pRes + (nPE - 1) * src2Offset;
        _pRes = _pRes1_32;

        k = lastresultLen >> 1;

        while (k) {

            temp1 = *_pRes++;
            temp2 = *_pRes++;

            *pOut++ += temp1;
            *pOut++ += temp2;

            k--;
        }

        k = lastresultLen % 2U;

        if (k) {
            *pOut++ += *_pRes++;
        }

    } else {

        _pRes1_32 = hal_cl_l1_malloc(sizeof(int32_t) * (resultsoffset));

        int32_t *pOut = pRes;
        int32_t *_pRes = _pRes1_32;

        for (uint32_t i = 0; i < nPE - 1; i++) {
            plp_conv_i32s_xpulpv2(pIn1, in1Len, pIn2 + i * src2Offset, src2Offset, _pRes1_32);

            pOut = pRes + i * src2Offset;
            _pRes = _pRes1_32;

            k = resultsoffset >> 1;
            while (k) {

                temp1 = *_pRes++;
                temp2 = *_pRes++;

                *pOut++ += temp1;
                *pOut++ += temp2;

                k--;
            }

            k = resultsoffset % 2U;

            if (k) {
                *pOut++ += *_pRes++;
            }
        }

        plp_conv_i32s_xpulpv2(pIn1, in1Len, pIn2 + (nPE - 1) * src2Offset,
                              in2Len - (src2Offset * (nPE - 1)), _pRes1_32);

        pOut = pRes + (nPE - 1) * src2Offset;
        _pRes = _pRes1_32;

        k = lastresultLen >> 1;

        while (k) {

            temp1 = *_pRes++;
            temp2 = *_pRes++;

            *pOut++ += temp1;
            *pOut++ += temp2;

            k--;
        }

        k = lastresultLen % 2U;

        if (k) {
            *pOut++ += *_pRes++;
        }
    }
    hal_cl_l1_free(_pRes1_32, sizeof(int32_t) * (resultsoffset));
}

Updated on 2023-03-01 at 16:16:32 +0000