/github/workspace/src/TransformFunctions/plp_dwt_f32_parallel.c

Functions

	Name
void	plp_dwt_f32_parallel(const float32_t restrict pSrc, uint32_t length, const plp_dwt_wavelet_f32 wavelet, plp_dwt_extension_mode mode, uint32_t nPE, float32_t restrict pDstA, float32_t *restrict pDstD) Parallel Floating-point DWT on real input data for XPULPV2 extension.
void	plp_dwt_dec_f32_parallel(const float32_t restrict pSrc, uint32_t length, const plp_dwt_wavelet_f32 wavelet, plp_dwt_extension_mode mode, uint32_t level, uint32_t nPE, float32_t restrict pTemp, float32_t *restrict pDst) Floating-point parallel n-level DWT for XPULPV2 extension.

Functions Documentation

function plp_dwt_f32_parallel

void plp_dwt_f32_parallel(
    const float32_t *__restrict__ pSrc,
    uint32_t length,
    const plp_dwt_wavelet_f32 wavelet,
    plp_dwt_extension_mode mode,
    uint32_t nPE,
    float32_t *__restrict__ pDstA,
    float32_t *__restrict__ pDstD
)

Parallel Floating-point DWT on real input data for XPULPV2 extension.

Parameters:

pSrc points to the input buffer (real data)
length length of input buffer
wavelet wavelet structure for calculating DWT
mode boundary extension mode
nPE Number of cores to use
pDstA points to ouput buffer with Approximate coefficients
pDstD points to ouput buffer with Detailed coefficients

Return: none

function plp_dwt_dec_f32_parallel

void plp_dwt_dec_f32_parallel(
    const float32_t *__restrict__ pSrc,
    uint32_t length,
    const plp_dwt_wavelet_f32 wavelet,
    plp_dwt_extension_mode mode,
    uint32_t level,
    uint32_t nPE,
    float32_t *__restrict__ pTemp,
    float32_t *__restrict__ pDst
)

Floating-point parallel n-level DWT for XPULPV2 extension.

Parameters:

pSrc points to the input buffer (real data)
length length of input buffer
wavelet wavelet structure for calculating DWT
mode boundary extension mode
level Levels of Wavelet decomposition
pDst points to ouput buffer with Detailed coefficients and final approximate

Return: none

Source code

/* ----------------------------------------------------------------------
 * Project:      PULP DSP Library
 * Title:        plp_dwt_f32_parallel.c
 * Description:  Floating-point Discret Wavelet Transform
 *
 * $Date:        10. Juli 2021
 * $Revision:    V1
 *
 * Target Processor: PULP cores with "F" support (wolfe)
 * -------------------------------------------------------------------- */
/*
 * Copyright (C) 2021 ETH Zurich and University of Bologna. All rights reserved.
 *
 * Author: Jakub Mandula, ETH Zurich
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "plp_math.h"
#include "plp_dwt_common.h"

void plp_dwt_f32_parallel(const float32_t *__restrict__ pSrc,
                 uint32_t length,
                 const plp_dwt_wavelet_f32 wavelet,
                 plp_dwt_extension_mode mode,
                 uint32_t nPE,
                 float32_t *__restrict__ pDstA,
                 float32_t *__restrict__ pDstD) {

   if((mode == PLP_DWT_MODE_ANTIREFLECT || mode == PLP_DWT_MODE_REFLECT) && length <= 1){
      printf("F Cannot run [anti]reflect mode on length 1 signal.\n");
      return;
   }

   float32_t *dec_hi_l1;
   float32_t *dec_lo_l1;
   plp_dwt_wavelet_f32 temp_wavelet;

   if (hal_cluster_id() == ARCHI_FC_CID) {
      printf("parallel processing supported only for cluster side\n");
      return;
   } else {
      plp_dwt_instance_f32 args = {
         .pSrc = pSrc,
         .length = length,
         .wavelet = wavelet,
         .mode = mode,
         .nPE = nPE,
         .pDstA = pDstA,
         .pDstD = pDstD
      };

      switch(wavelet.type) {
      case PLP_DWT_WAVELET_HAAR:
      case PLP_DWT_WAVELET_DB1:
         hal_cl_team_fork(nPE, plp_dwt_haar_f32p_xpulpv2, (void *)&args);
         break;
      default:

         dec_hi_l1 = hal_cl_l1_malloc(sizeof(float32_t) * (wavelet.length));
         dec_lo_l1 = hal_cl_l1_malloc(sizeof(float32_t) * (wavelet.length));

         copy_coefs_f32(dec_hi_l1, dec_lo_l1, wavelet);
         temp_wavelet = (plp_dwt_wavelet_f32){
            .length = wavelet.length,
            .type = wavelet.type,
            .dec_hi = dec_hi_l1,
            .dec_lo = dec_lo_l1
         };

         args.wavelet = temp_wavelet;

         hal_cl_team_fork(nPE, plp_dwt_f32p_xpulpv2, (void *)&args);
         hal_cl_l1_free(dec_hi_l1, sizeof(float32_t) * (wavelet.length) );
         hal_cl_l1_free(dec_lo_l1, sizeof(float32_t) * (wavelet.length) );
         break;
      }
   }

}




void plp_dwt_dec_f32_parallel(const float32_t *__restrict__ pSrc,
                     uint32_t length,
                     const plp_dwt_wavelet_f32 wavelet,
                     plp_dwt_extension_mode mode,
                     uint32_t level,
                     uint32_t nPE,
                     float32_t *__restrict__ pTemp,
                     float32_t *__restrict__ pDst){
   if((mode == PLP_DWT_MODE_ANTIREFLECT || mode == PLP_DWT_MODE_REFLECT) && length <= 1){
      printf("F Cannot run [anti]reflect mode on length 1 signal.\n");
      return;
   }

   if (hal_cluster_id() == ARCHI_FC_CID) {
      printf("error: FC doesn't have FPU\n");
      return;
   } else {


      uint32_t dst_offset = 0;
      uint32_t out_len = length;
      uint32_t in_len;
      uint32_t quotient = (out_len/(wavelet.length - 1)) >> 1;

      float32_t *dec_hi_l1;
      float32_t *dec_lo_l1;

      plp_dwt_instance_f32 args = {
         .mode = mode,
         .nPE = nPE
      };

      switch(wavelet.type) {
         case PLP_DWT_WAVELET_HAAR:
         case PLP_DWT_WAVELET_DB1:
            break;
         default:
            dec_hi_l1 = hal_cl_l1_malloc(sizeof(float32_t) * (wavelet.length));
            dec_lo_l1 = hal_cl_l1_malloc(sizeof(float32_t) * (wavelet.length));
            copy_coefs_f32(dec_hi_l1, dec_lo_l1, wavelet);

            args.wavelet = (plp_dwt_wavelet_f32){
                  .length = wavelet.length,
                  .type = wavelet.type,
                  .dec_hi = dec_hi_l1,
                  .dec_lo = dec_lo_l1
            };
            break;
      }



      float32_t *pTempBuff1 = pTemp; // For holding odd A coeffs
      float32_t *pTempBuff2 = pTemp + PLP_DWT_DEC_TEMP_LEN(length, wavelet.length); // For holding even A coeffs


      const float32_t *pS = pSrc;
      float32_t *pTempADst = pTempBuff1;

      do {
         in_len = out_len; // Get input length (previous output length)
         out_len = PLP_DWT_OUTPUT_LENGTH(out_len, wavelet.length); // Calculate new output length 

         args.pSrc = pS;
         args.length = in_len;
         args.pDstA = pTempADst;
         args.pDstD = pDst + dst_offset;

         /* The signal (or previous approx. coeffs) are the input
         * Approx. coeffs are written after the detailed coeffs
         * Detailed coeffs are appended to end of dest buffer
         * 
         * Level 1: pDst = [ D1 D1 D1 D1  x  x  x  x]  ptmpADst(Buff1) = [A1 A1 A1 A1]
         * Level 2: pDst = [ D1 D1 D1 D1 D2 D2  x  x]  ptmpADst(Buff2) = [A2 A2]
         * Level 3: pDst = [ D1 D1 D1 D1 D2 D2 D3  x]  ptmpADst(Buff1) = [A3]
         */

         // printf("Q: %d, In_L: %d Out_l: %d dst_offset: %d\n", quotient, in_len, out_len, dst_offset);
         switch(wavelet.type) {
         case PLP_DWT_WAVELET_HAAR:
         case PLP_DWT_WAVELET_DB1:
            hal_cl_team_fork(nPE, plp_dwt_haar_f32p_xpulpv2, (void *)&args);
            break;
         default:
            hal_cl_team_fork(nPE, plp_dwt_f32p_xpulpv2, (void *)&args);
            break;
         }

         pS = pTempADst; // Next input will be the current Approx coeffs

         // Choose the other buffer as the next Approx buffer
         if(pTempADst == pTempBuff1){
            pTempADst = pTempBuff2;
         }else{
            pTempADst = pTempBuff1;
         }


         dst_offset += out_len;
         // In the case that level was 0, it will underflow.
         level--;
         // The while loop will then run until max_level
      } while((quotient >>= 1) && (level > 0));

      // Copy the remaining Approx coeffs from the old pTempADst
      for(int32_t i = 0; i < out_len; i++){
         pDst[dst_offset + i] = pS[i];
      }


      switch(wavelet.type) {
      case PLP_DWT_WAVELET_HAAR:
      case PLP_DWT_WAVELET_DB1:
         break;
      default:
         hal_cl_l1_free(dec_hi_l1, sizeof(float32_t) * (wavelet.length) );
         hal_cl_l1_free(dec_lo_l1, sizeof(float32_t) * (wavelet.length) );
         break;
      }
   }
}

Updated on 2023-03-01 at 16:16:33 +0000