/github/workspace/src/FilteringFunctions/plp_conv_valid_rep_i8.c

Functions

	Name
void	plp_conv_valid_rep_i8(const int8_t * pSrcA, const uint32_t srcALen, const int8_t * pSrcB, const uint32_t srcBLen, int32_t * pRes) Glue code for convolution of 8-bit integer vectors in valid range.

Functions Documentation

function plp_conv_valid_rep_i8

void plp_conv_valid_rep_i8(
    const int8_t * pSrcA,
    const uint32_t srcALen,
    const int8_t * pSrcB,
    const uint32_t srcBLen,
    int32_t * pRes
)

Glue code for convolution of 8-bit integer vectors in valid range.

Parameters:

pSrcA points to the first input vector, must be on L2
srcALen ength of the first input vector
pSrcB points to the second input vector, must be on L2
srcBLen Length of the second input vector
pRes output result returned here, of size |srcALen - srcBLen| + 1, preferably in L1

Return: none

Glue code for convolution (valid with data replication) of 8-bit integer vectors.

Source code

/* =====================================================================
 * Project:      PULP DSP Library
 * Title:        plp_dot_prod_i8.c
 * Description:  8-bit integer convolution (valid with data replication)
 *               glue code
 *
 * $Date:        24. April 2020
 * $Revision:    V0
 *
 * Target Processor: PULP cores
 * ===================================================================== */
/*
 * Copyright (C) 2020 ETH Zurich and University of Bologna.
 *
 * Author: Moritz Scherer, Tibor Schneider
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "plp_math.h"

void plp_conv_valid_rep_i8(const int8_t *pSrcA,
                           const uint32_t srcALen,
                           const int8_t *pSrcB,
                           const uint32_t srcBLen,
                           int32_t *pRes) {

    uint32_t in1Len, in2Len;
    const int8_t *pIn1;
    const int8_t *pIn2;

    if (srcALen >= srcBLen) {
        in1Len = srcALen;
        in2Len = srcBLen;
        pIn1 = pSrcA;
        pIn2 = pSrcB;
    } else {
        in2Len = srcALen;
        in1Len = srcBLen;
        pIn2 = pSrcA;
        pIn1 = pSrcB;
    }

    if (hal_cluster_id() == ARCHI_FC_CID) {

        printf("Errorr: Not Implemented!");

    } else {

        /*
         * because of data replication, the first copy starts at index 0, and must go up to index
         * in1Len - 4, therefore, it is in1Len - 3 long. The last copy starts at index 3 and goes
         * up to index in1Len - 1.
         */

        // compute required memory size
        uint32_t len_align = ((in1Len + 3) >> 2) << 2; // compute aligned memory size
        uint32_t mem_size = len_align << 2;            // memory size for all 4 replications

        int8_t *p_1_loc = hal_cl_l1_malloc(sizeof(int8_t) * mem_size);
        int8_t *p_2_loc = hal_cl_l1_malloc(sizeof(int8_t) * in2Len);

        if (p_1_loc == NULL || p_2_loc == NULL) {
            printf("Error: insufficient L1 memory!\n");
            return;
        }

        // copy the data over to the L1 data, replicated 4 times
        hal_cl_dma_cmd_t copy;
        int merge = 0;

        for (int i = 0; i < 4; i++) {
            hal_cl_dma_cmd((unsigned int)(pIn1 + i), (unsigned int)(p_1_loc + i * len_align),
                          sizeof(int8_t) * (in1Len - i), HAL_CL_DMA_DIR_EXT2LOC, merge, &copy);
            merge = 1;
        }

        hal_cl_dma_cmd((unsigned int)pIn2, (unsigned int)p_2_loc, sizeof(int8_t) * in2Len,
                      HAL_CL_DMA_DIR_EXT2LOC, merge, &copy);

        hal_cl_dma_cmd_wait(&copy);

        plp_conv_valid_rep_i8s_xpulpv2(p_1_loc, in1Len, len_align, p_2_loc, in2Len, pRes);

        hal_cl_l1_free(p_1_loc, sizeof(int8_t) * mem_size);
        hal_cl_l1_free(p_2_loc, sizeof(int8_t) * in2Len);
    }
}

Updated on 2023-03-01 at 16:16:32 +0000