/***************************************************************************
    File        : FIRFilter.cpp
    Description : Implements class FIRFilter
 ---------------------------------------------------------------------------
    Begin       : Mon Sep 3 2001
    Author(s)   : Roberto Grosso
 ***************************************************************************/

#include "FIRFilter.h"



// Auxiliary functions
inline void
ComputeBlockSize(const unsigned int sgSize,const unsigned int ftSize,unsigned int& noSegm,unsigned int& llSize);

// Method: filter
//   Computes the answer of signal when filtered
//   with the response filter
bool
gwd::FIRFilter::Filter(Vector& signal,Vector& response,Vector& answer)
{
  if (signal.empty() || response.empty())
  {
    Singleton* single = Singleton::exemplar();
    single->AppendMessage("gwd::FIRFilter::Filter(): empty signal or impulse response");
    return false;
  }
    
  typedef Vector::size_type SizeType;
  typedef unsigned int uint;

  // Array sizes
  const SizeType sgSize = signal.size();       // size of input signal
  const SizeType ftSize = response.size();     // size of filter
  const SizeType anSize = sgSize + ftSize - 1; // size of answer
  const SizeType ovSize = ftSize - 1;          // overlap size
  
  // Compute segment size and number of segments
  unsigned int llSize;
  unsigned int noSegm;
  ComputeBlockSize((uint)sgSize,(uint)ftSize,noSegm,llSize);

  // If no. of segments is one, do a simple
  // convolution
  if (noSegm == 1)
  {
    // Set an array of size 2^n
    SizeType anSize = signal.size() + response.size() - 1;
    SizeType fftSize,fftN;
    CeilPowerOf2<SizeType>(anSize,fftSize,fftN);
    SizeType NN = fftSize / 2 + 1;

    Vector s(fftSize);
    std::copy(signal.begin(),signal.end(),s.begin());
    std::fill(s.begin()+signal.size(),s.end(),double(0));
    Vector h(fftSize);
    std::copy(response.begin(),response.end(),h.begin());
    std::fill(h.begin()+response.size(),h.end(),double(0));

    // If signal size equals response size, just do convolution
    std::vector<Complex> cSignal(NN);
    std::vector<Complex> cFilter(NN);
    // Fourier Transform
    Fourier fourier;
    fourier.dft(s,cSignal);
    fourier.dft(h,cFilter);

    // Computes convolution
    for (SizeType nn = 0; nn < cSignal.size(); nn++) cSignal[nn] *= cFilter[nn];
    // Normalize
    const double factor = static_cast<double>(fftSize);
    for (SizeType nn = 0; nn < cSignal.size(); nn++) cSignal[nn] /= factor;
    // Transform to time domain
    fourier.dft(cSignal,s);
    // Create answer
    answer.resize(anSize);
    std::copy(s.begin(),s.begin()+anSize,answer.begin());
  }
  else
  { // use overlap-add method
    // create the answer array
    answer.resize(anSize);

    // create a buffer for the segments
    Vector segment(llSize+ovSize);
    std::fill(segment.begin(),segment.end(),double(0));

    // create a buffer of overlap size and initialize
    Vector overlapBuffer(ovSize);
    std::fill(overlapBuffer.begin(),overlapBuffer.end(),double(0));

    // Initialize convolution
    Convolution convl;
    convl.Init((uint)(llSize+ovSize),segment,response);

    // Main loop over the segments
    // Set iterators
    Vector::iterator p;
    Vector::iterator q;
    for (unsigned int ll = 0; ll < noSegm; ll++)
    {
      // copy from signal at ll*llSize until (ll+1)*llSignal to segment
      std::copy(signal.begin() + ll*llSize,signal.begin() + (ll+1)*llSize,segment.begin());
      std::fill(segment.begin()+llSize,segment.end(),double(0));
      // carry out convolution
      convl.ComputeConvolution();

      // copy back
      std::copy(segment.begin(),segment.begin()+llSize,answer.begin()+ll*llSize);

      // overlap buffer
      p = overlapBuffer.begin();
      //q = answer.begin()+(ll+1)*llSize;
      q = answer.begin()+ll*llSize;
      while(p != overlapBuffer.end()) *q++ += *p++;

      // set new overlpa buffer
      std::copy(segment.begin()+llSize,segment.end(),overlapBuffer.begin());
    } // for ll noSegm

    // Process the remaining part of the signal of size
    // sigSize - noSegm*llSize
    SizeType length = sgSize - noSegm*llSize;
    if (length > 0)
    {
      // has to create data for the complete answer array
      SizeType tmpVal = anSize - noSegm*llSize;
      length = (length > tmpVal) ? length : tmpVal;

      // Introduce a vector with size 2^N for
      // better fft performance. The vector has to be
      // as long as the longest of response or answer. 
      SizeType fftSize,fftNN;
      if (length >= response.size())
        CeilPowerOf2(length,fftSize,fftNN);
      else
        CeilPowerOf2(response.size(),fftSize,fftNN);
        
      // Create auxiliary arrays and initialize
      Vector s(fftSize);
      std::copy(signal.begin()+noSegm*llSize,signal.end(),s.begin());
      std::fill(s.begin()+length,s.end(),double(0));
      Vector h(fftSize);
      std::copy(response.begin(),response.end(),h.begin());
      if (fftSize > response.size())
        std::fill(h.begin()+response.size(),h.end(),double(0));
  
      // Create complex arrays
      SizeType NN = fftSize / 2 + 1;
      std::vector<Complex> cSignal(NN);
      std::vector<Complex> cFilter(NN);

      // Fourier Transform
      Fourier fourier;
      fourier.dft(s,cSignal);
      fourier.dft(h,cFilter);
  
      // Computes convolution
      for (SizeType nn = 0; nn < cSignal.size(); nn++) cSignal[nn] *= cFilter[nn];
      // Normalize
      const double factor = static_cast<double>(fftSize);
      for (SizeType nn = 0; nn < cSignal.size(); nn++) cSignal[nn] /= factor;
      // Transform to time domain
      fourier.dft(cSignal,s);

      // copy back
      // don't copy more than answer can take
      if ((answer.size() - noSegm*llSize) > fftSize)
      {
        p = s.begin();
        q = answer.begin()+noSegm*llSize;
        while (p != s.end()) *q++ = *p++;
      }
      else
      {
        p = s.begin();
        q = answer.begin()+noSegm*llSize;
        while (q != answer.end()) *q++ = *p++;
      }
    } // if (length > 0)
    
    // Add the overlap buffer from last segment
    if ((answer.size() - noSegm*llSize) > ovSize)
    {
      p = overlapBuffer.begin();
      q = answer.begin()+(noSegm)*llSize;
      while(p != overlapBuffer.end()) *q++ += *p++;
    }
    else
    {
      p = overlapBuffer.begin();
      q = answer.begin()+(noSegm)*llSize;
      while(q != answer.end()) *q++ += *p++;
    }
  } // else noSegm > 1

  return true;
} // Filter()



// Imlementation Auxiliary functions
class __FFlop {
public:
  __FFlop( ) : mPos(0),mFlp(0) {}
  __FFlop( unsigned int n,unsigned int fp ) : mPos( n ),mFlp(fp) {}
  __FFlop( const __FFlop& rhs ) : mPos( rhs.mPos ), mFlp( rhs.mFlp ) {}
  __FFlop& operator=( const __FFlop& rhs ) { mPos = rhs.mPos; mFlp = rhs.mFlp; return *this; }
  bool operator<( const __FFlop& rhs ) const {return ( mFlp < rhs.mFlp );}
  unsigned int val() { return mFlp; }
  unsigned int pos() { return mPos; }
private:
  unsigned int mPos;
  unsigned int mFlp;
};

// No. of components in array fftwFlopsR2C and fftwFlopsC2R
const unsigned int szFFTWFlops = 9;

// Flops of fftw for computing the real to complex DFT of size 2^N
const unsigned int fftwFlopsR2C[9][4] = {
  {10,10312,3272,1668},
  {11,22664,7224,4020},
  {12,49672,15896,9236},
  {13,107528,34520,21204},
  {14,231688,74584,47700},
  {15,503816,162904,101460},
  {16,1073416,347480,224340},
  {17,2277640,738648,492628},
  {18,4833544,1570136,1061972}
  //{19,10191112,3315032,2298964},
  //{20,21446920,6985048,4936788}
};

// Flops of fftw for computing the complex to real DFT of size 2^N
const unsigned int fftwFlopsC2R[9][4] = {
  {10,10308,3480,1708},
  {11,22692,7512,4076},
  {12,49480,16296,9428},
  {13,107652,35392,21164},
  {14,231688,76144,47700},
  {15,502308,167224,104076},
  {16,1070152,354920,228628},
  {17,2279432,754664,494228},
  {18,4837380,1599424,1062188}
  //{19,10199172,3370432,2296364},
  //{20,21446920,7084912,4936788}
};
  
inline void
ComputeBlockSize(const unsigned int sgSize,const unsigned int ftSize,unsigned int& noSegm,unsigned int& llSize)
{
  // Remember that the answer has the length
  // answer = signal + filter - 1

  // The biggest size of a signal before it is
  // split into segments
  unsigned int lim = 2<<10;
  // if the filter larger than signal
  // consider only one segment
  if (ftSize >= sgSize)
  {
    noSegm = 1;
    llSize = ftSize;
  }
  // if signal is short use only one segmen
  else if ((sgSize+ftSize-1) < lim)
  {
    noSegm = 1;
    llSize = sgSize;
  }
  else
  {
    // Compute the flops of the fftw 3
    // for different segment sizes
    // For different segment sizes store
    // the corresponding flops
    std::list<__FFlop> flps;
    for (unsigned int kk = 0; kk < szFFTWFlops; kk++)
    {
      // do not consiger the case of a segment shorter
      // than the filter
      unsigned int nn = (2<<(fftwFlopsR2C[kk][0]));
      if (nn < ftSize)
        continue;

      // the segment size is ll and satisfies
      // the condition
      //  ll + ftSize - 1 = nn = 2^N
      unsigned int ll = nn - ftSize + 1; // segment size
      unsigned int ns = sgSize / ll;     // no. of segments
      if (ns)
      {
        unsigned int value = 0;
        for (unsigned int ii = 1; ii < 4; ii++)
        {
          value += fftwFlopsR2C[kk][ii];
          value += fftwFlopsC2R[kk][ii];
        }
        value += (ftSize-1);
        value *= ns;
        __FFlop fl(kk,value);
        flps.push_back(fl);
      }
      else break;
    }
    // find min.
    std::list<__FFlop>::iterator p;
    p = std::min_element (flps.begin(),flps.end());

    // calculate the segment size
    llSize = (2<<(fftwFlopsR2C[(*p).pos()][0])) - ftSize + 1;
    if (llSize == 0)
      noSegm = 1;
    else if (llSize >= sgSize)
      noSegm = 1;
    else
      noSegm = sgSize / llSize;
  } // else noSegm > 1

} // ComputeBlockSize




