/github/workspace/src/FilteringFunctions/plp_conv_valid_rep_i16.c
Functions
Name | |
---|---|
void | plp_conv_valid_rep_i16(const int16_t * pSrcA, const uint32_t srcALen, const int16_t * pSrcB, const uint32_t srcBLen, int32_t * pRes) Glue code for convolution of 16-bit integer vectors in valid range. |
Functions Documentation
function plp_conv_valid_rep_i16
void plp_conv_valid_rep_i16(
const int16_t * pSrcA,
const uint32_t srcALen,
const int16_t * pSrcB,
const uint32_t srcBLen,
int32_t * pRes
)
Glue code for convolution of 16-bit integer vectors in valid range.
Parameters:
- pSrcA points to the first input vector, must be on L2
- srcALen ength of the first input vector
- pSrcB points to the second input vector, must be on L2
- srcBLen Length of the second input vector
- pRes output result returned here, of size |srcALen - srcBLen| + 1, preferably in L1
Return: none
Glue code for convolution (valid with replication) of 16-bit integer vectors.
Source code
/* =====================================================================
* Project: PULP DSP Library
* Title: plp_conv_valid_rep_i16.c
* Description: 16-bit integer convolution (valid with data replication)
* glue code
*
* $Date: 3. May 2020
* $Revision: V0
*
* Target Processor: PULP cores
* ===================================================================== */
/*
* Copyright (C) 2020 ETH Zurich and University of Bologna.
*
* Author: Moritz Scherer, Tibor Schneider
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plp_math.h"
void plp_conv_valid_rep_i16(const int16_t *pSrcA,
const uint32_t srcALen,
const int16_t *pSrcB,
const uint32_t srcBLen,
int32_t *pRes) {
uint32_t in1Len, in2Len;
const int16_t *pIn1;
const int16_t *pIn2;
if (srcALen >= srcBLen) {
in1Len = srcALen;
in2Len = srcBLen;
pIn1 = pSrcA;
pIn2 = pSrcB;
} else {
in2Len = srcALen;
in1Len = srcBLen;
pIn2 = pSrcA;
pIn1 = pSrcB;
}
if (hal_cluster_id() == ARCHI_FC_CID) {
printf("Errorr: Not Implemented!");
} else {
/*
* because of data replication, the first copy starts at index 0, and must go up to index
* in1Len - 2, therefore, it is in1Len - 1 long. The last copy starts at index 1 and goes
* up to index in1Len - 1.
*/
// compute required memory size
uint32_t len_align = ((in1Len + 1) >> 1) << 1; // compute aligned memory size
uint32_t mem_size = len_align << 1; // memory size for all 2 replications
int16_t *p_1_loc = hal_cl_l1_malloc(sizeof(int16_t) * mem_size);
int16_t *p_2_loc = hal_cl_l1_malloc(sizeof(int16_t) * in2Len);
if (p_1_loc == NULL || p_2_loc == NULL) {
printf("Error: insufficient L1 memory!\n");
return;
}
// copy the data over to the L1 data, replicated 4 times
hal_cl_dma_cmd_t copy;
int merge = 0;
for (int i = 0; i < 2; i++) {
hal_cl_dma_cmd((unsigned int)(pIn1 + i), (unsigned int)(p_1_loc + i * len_align),
sizeof(int16_t) * (in1Len - i), HAL_CL_DMA_DIR_EXT2LOC, merge, ©);
merge = 1;
}
hal_cl_dma_cmd((unsigned int)pIn2, (unsigned int)p_2_loc, sizeof(int16_t) * in2Len,
HAL_CL_DMA_DIR_EXT2LOC, merge, ©);
hal_cl_dma_cmd_wait(©);
plp_conv_valid_rep_i16s_xpulpv2(p_1_loc, in1Len, len_align, p_2_loc, in2Len, pRes);
hal_cl_l1_free(p_1_loc, sizeof(int16_t) * mem_size);
hal_cl_l1_free(p_2_loc, sizeof(int16_t) * in2Len);
}
}
Updated on 2023-03-01 at 16:16:32 +0000