/github/workspace/src/FilteringFunctions/kernels/plp_conv_parallel_OLA.c
Functions
Name | |
---|---|
void | plp_conv_parallel_OLA(uint32_t nPE, uint32_t srcALen, uint32_t srcBLen, int32_t * resultsBuffer) Helper function for parallelized overlap-adding of partial convolution results. |
Functions Documentation
function plp_conv_parallel_OLA
void plp_conv_parallel_OLA(
uint32_t nPE,
uint32_t srcALen,
uint32_t srcBLen,
int32_t * resultsBuffer
)
Helper function for parallelized overlap-adding of partial convolution results.
Parameters:
- nPE Number of processing cores
- srcALen Length of the first original input vector
- srcBLen Length of the second original input vector
- resultsBuffer resultsBuffer array from plp_conv_i[XX]_parallel
Return: none
Source code
/* =====================================================================
* Project: PULP DSP Library
* Title: plp_conv_parallel_OLA.c
* Description: Parallel integer convolution for XPULPV2 using OLA method
*
* $Date: 01. July 2019
* $Revision: V0
*
* Target Processor: PULP cores
* ===================================================================== */
/*
* Copyright (C) 2019 ETH Zurich and University of Bologna.
*
* Author: Moritz Scherer, ETH Zurich
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "plp_math.h"
#include "rtos_hal.h"
void plp_conv_parallel_OLA(uint32_t nPE,
uint32_t srcALen,
uint32_t srcBLen,
int32_t *resultsBuffer) {
uint32_t srcAoffset = ((srcALen + nPE - 1) / nPE);
uint32_t resultsoffset = srcAoffset + srcBLen - 1;
uint32_t resultsLen =
resultsoffset * (nPE - 1) + (srcALen - (srcAoffset * (nPE - 1))) + srcBLen - 1;
uint32_t cycles = nPE;
uint32_t remainingcycles = cycles;
uint32_t participants = nPE >> (1);
plp_conv_tree_add_instance S = { .addOffset = srcAoffset,
.addLengthfirst = resultsoffset,
.addLengthsecond =
(srcALen - (srcAoffset * (nPE - 1))) + srcBLen - 1,
.numVectors = nPE,
.pRes = resultsBuffer,
.blockOffset = resultsoffset,
.coresPerVector = 2 * ((nPE) / ((S.numVectors >> 1) << 1)) };
while (remainingcycles > 1U) {
hal_cl_team_fork((S.coresPerVector * (S.numVectors >> 1)), plp_conv_parallel_OLA_kernel,
(void *)&S);
S.numVectors = S.numVectors - participants;
S.blockOffset *= 2;
S.addLengthfirst = S.addLengthfirst + S.addOffset;
S.addOffset *= 2;
remainingcycles = (remainingcycles + 1) >> 1;
participants = S.numVectors >> 1;
S.coresPerVector = ((2 * nPE) / ((S.numVectors >> 1) << 1));
}
}
Updated on 2023-03-01 at 16:16:32 +0000