/github/workspace/src/FilteringFunctions/plp_conv_i16.c

Functions

	Name
void	plp_conv_i16(const int16_t * pSrcA, const uint32_t srcALen, const int16_t * pSrcB, const uint32_t srcBLen, int32_t * pRes) Glue code for convolution of 16-bit integer vectors.

Attributes

	Name
int32_t *	_pRes1_16

Defines

	Name
	OLARATIO16

Functions Documentation

function plp_conv_i16

void plp_conv_i16(
    const int16_t * pSrcA,
    const uint32_t srcALen,
    const int16_t * pSrcB,
    const uint32_t srcBLen,
    int32_t * pRes
)

Glue code for convolution of 16-bit integer vectors.

Parameters:

pSrcA points to the first input vector
srcALen Length of the first input vector
pSrcB points to the second input vector
srcBLen Length of the second input vector
pRes output result returned here

Return: none

Attributes Documentation

variable _pRes1_16

static int32_t * _pRes1_16;

Macros Documentation

define OLARATIO16

#define OLARATIO16 8

Source code

/* =====================================================================
 * Project:      PULP DSP Library
 * Title:        plp_dot_prod_i16.c
 * Description:  16-bit integer convolution glue code
 *
 * $Date:        01. July 2019
 * $Revision:    V0
 *
 * Target Processor: PULP cores
 * ===================================================================== */
/*
 * Copyright (C) 2019 ETH Zurich and University of Bologna.
 *
 * Author: Moritz Scherer
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "plp_math.h"

#define OLARATIO16 8
static int32_t *_pRes1_16;

void plp_conv_i16(const int16_t *pSrcA,
                  const uint32_t srcALen,
                  const int16_t *pSrcB,
                  const uint32_t srcBLen,
                  int32_t *pRes) {

    uint32_t in1Len, in2Len;
    const int16_t *pIn1;
    const int16_t *pIn2;

    if (srcALen >= srcBLen) {
        in1Len = srcALen;
        in2Len = srcBLen;
        pIn1 = pSrcA;
        pIn2 = pSrcB;
    } else {
        in2Len = srcALen;
        in1Len = srcBLen;
        pIn2 = pSrcA;
        pIn1 = pSrcB;
    }

    uint32_t nPE = (OLARATIO16 / (in1Len / in2Len));
    nPE = nPE > 0 ? nPE : 1;
    uint32_t src2Offset = ((in2Len + nPE - 1) / nPE);
    uint32_t resultsoffset = src2Offset + in1Len - 1;
    uint32_t lastresultLen = (in2Len - (src2Offset * (nPE - 1))) + in1Len - 1;

    uint32_t temp1, temp2, k;

    for (uint32_t i = 0; i < srcALen + srcBLen - 1; i++) {
        pRes[i] = 0;
    }

    if (hal_cluster_id() == ARCHI_FC_CID) {

        _pRes1_16 = hal_fc_l1_malloc(sizeof(int32_t) * (resultsoffset));

        int32_t *pOut = pRes;
        int32_t *_pRes = _pRes1_16;

        for (uint32_t i = 0; i < nPE - 1; i++) {
            plp_conv_i16s_rv32im(pIn1, in1Len, pIn2 + i * src2Offset, src2Offset, _pRes1_16);

            pOut = pRes + i * src2Offset;
            _pRes = _pRes1_16;

            k = resultsoffset >> 1;
            while (k) {

                temp1 = *_pRes++;
                temp2 = *_pRes++;

                *pOut++ += temp1;
                *pOut++ += temp2;

                k--;
            }

            k = resultsoffset % 2U;

            if (k) {
                *pOut++ += *_pRes++;
            }
        }

        plp_conv_i16s_rv32im(pIn1, in1Len, pIn2 + (nPE - 1) * src2Offset,
                             in2Len - (src2Offset * (nPE - 1)), _pRes1_16);

        pOut = pRes + (nPE - 1) * src2Offset;
        _pRes = _pRes1_16;

        k = lastresultLen >> 1;

        while (k) {

            temp1 = *_pRes++;
            temp2 = *_pRes++;

            *pOut++ += temp1;
            *pOut++ += temp2;

            k--;
        }

        k = lastresultLen % 2U;

        if (k) {
            *pOut++ += *_pRes++;
        }

    } else {

        _pRes1_16 = hal_cl_l1_malloc(sizeof(int32_t) * (resultsoffset));

        int32_t *pOut = pRes;
        int32_t *_pRes = _pRes1_16;

        for (uint32_t i = 0; i < nPE - 1; i++) {
            plp_conv_i16s_xpulpv2(pIn1, in1Len, pIn2 + i * src2Offset, src2Offset, _pRes1_16);

            pOut = pRes + i * src2Offset;
            _pRes = _pRes1_16;

            k = resultsoffset >> 1;
            while (k) {

                temp1 = *_pRes++;
                temp2 = *_pRes++;

                *pOut++ += temp1;
                *pOut++ += temp2;

                k--;
            }

            k = resultsoffset % 2U;

            if (k) {
                *pOut++ += *_pRes++;
            }
        }

        plp_conv_i16s_xpulpv2(pIn1, in1Len, pIn2 + (nPE - 1) * src2Offset,
                              in2Len - (src2Offset * (nPE - 1)), _pRes1_16);

        pOut = pRes + (nPE - 1) * src2Offset;
        _pRes = _pRes1_16;

        k = lastresultLen >> 1;

        while (k) {

            temp1 = *_pRes++;
            temp2 = *_pRes++;

            *pOut++ += temp1;
            *pOut++ += temp2;

            k--;
        }

        k = lastresultLen % 2U;

        if (k) {
            *pOut++ += *_pRes++;
        }
    }
    hal_cl_l1_free(_pRes1_16, sizeof(int32_t) * (resultsoffset));
}

Updated on 2023-03-01 at 16:16:32 +0000