/* FILE: fft.cc -*-Mode: c++-*- * * C++ code to do 1 and 2 dimensional FFT's. * */ #include #include "nb.h" #include "fft.h" #ifdef USE_MPI #include "mmsmpi.h" #endif /* USE_MPI */ /* End includes */ #undef USE_COMPLEX #ifndef OLD_CODE #define CMULT(xr,xi,yr,yi,zr,zi) (zr) = (xr)*(yr)-(xi)*(yi), \ (zi) = (xr)*(yi)+(xi)*(yr) #else extern inline void CMULT(const MY_COMPLEX_REAL_TYPE &xr, const MY_COMPLEX_REAL_TYPE &xi, const MY_COMPLEX_REAL_TYPE &yr, const MY_COMPLEX_REAL_TYPE &yi, MY_COMPLEX_REAL_TYPE &zr, MY_COMPLEX_REAL_TYPE &zi) { zr = xr*yr-xi*yi; zi = xr*yi+xi*yr; } #endif // OLD_CODE void FFT::ReleaseMemory(void) { if(vecsize>0) { if(Uforward!=(MyComplex *)NULL) delete[] Uforward; if(Uinverse!=(MyComplex *)NULL) delete[] Uinverse; if(permindex!=(int *)NULL) delete[] permindex; } vecsize=0; Uforward=Uinverse=(MyComplex *)NULL; permindex=(int *)NULL; } FFT::~FFT(void) { ReleaseMemory(); } void FFT::Setup(int size) { if(size==vecsize) return; if(size<1) PlainError(1,"Error in FFT::Setup(int): " "Requested length (%d) must be >0",size); int k; // Check that size is power of 2 for(k=size;k>2;k/=2) if(k%2!=0) PlainError(1,"Error in FFT::Setup(int): " "Requested length (%d) is not a power of 2",size); ReleaseMemory(); vecsize=size; // Allocate and setup MyComplex arrays if((Uforward=new MyComplex[size])==0) PlainError(1,"Error in FFT::Setup %s",ErrNoMem); if((Uinverse=new MyComplex[size])==0) PlainError(1,"Error in FFT::Setup %s",ErrNoMem); #ifdef ORIG_CODE double baseang= -2*PI/double(size); Uforward[0]=Uinverse[0]=MyComplex(1,0); double x,y; for(k=1;k1) { Uforward[size/2]=Uinverse[size/2]=MyComplex(-1,0); } #else double baseang = -2*PI/double(size); for(k=1;k1) { Uforward[size/2]=Uinverse[size/2]=MyComplex(-1,0); } if(size>3) { Uforward[size/4]=Uinverse[3*size/4]=MyComplex(0,-1); Uforward[3*size/4]=Uinverse[size/4]=MyComplex(0,1); } if(size>7) { double x=SQRT1_2; // 1/sqrt(2) double y=-x; Uforward[size/8]=Uinverse[7*size/8]=MyComplex(x,y); Uforward[3*size/8]=Uinverse[5*size/8]=MyComplex(-x,y); Uforward[5*size/8]=Uinverse[3*size/8]=MyComplex(-x,-y); Uforward[7*size/8]=Uinverse[size/8]=MyComplex(x,-y); } #endif // ORIG_CODE // Allocate and setup (bit-reversal) permutation index if((permindex=new int[size])==0) PlainError(1,"Error in FFT::Setup %s",ErrNoMem); permindex[0]=0; int m,n; // The following code relies heavily on size==2^log2vecsize for(k=1,n=size>>1;kk) permindex[k]=n; // Swap index else permindex[k]=0; // Do nothing: Index already swapped or the same // Calculate next n m=size>>1; while(m>0 && n&m) { n-=m; m>>=1; } n+=m; } } void FFT::ForwardDecFreq(int size,MyComplex *vec,FFT_REAL_TYPE divisor) { if(divisor==0) divisor=1.0; // Default is no normalization on forward FFT Setup(size); BaseDecFreqForward(vec); Permute(vec); if(divisor!=0. && divisor!=1.) { MY_COMPLEX_REAL_TYPE mult=1./divisor; for(int k=0;k4 for(blocksize=vecsize,blockcount=1;blocksize>4; blocksize/=4,blockcount*=4) { // Loop through double-step matrix multiplications halfbs=blocksize/2; threehalfbs=blocksize+halfbs; for(block=0,v=dvec;block2) { blockcount=vecsize/4; for(block=0,v=dvec;block4 for(blocksize=16,blockcount=vecsize/16;blocksize<=vecsize; blocksize*=4,blockcount/=4) { // Loop through double-step matric multiplications halfbs=blocksize/2; threehalfbs=blocksize+halfbs; for(block=0,v=dvec;block= 1. Also extract // base-2 log of sizes if(size1<1) PlainError(1,"Error in FFTReal2D::Setup(int): " "Requested size1 (%d) must be >=1",size1); if(size2<1) PlainError(1,"Error in FFTReal2D::Setup(int): " "Requested size2 (%d) must be >=1",size2); int k; if(size1==1) { logsize1=0; } else { for(k=size1,logsize1=1;k>2;k/=2,logsize1++) if(k%2!=0) PlainError(1,"Error in FFTReal2D::Setup(int): " "Requested size1 (%d) is not a power of 2",size1); } if(size2==1) { logsize2=0; } else { for(k=size2,logsize2=1;k>2;k/=2,logsize2++) if(k%2!=0) PlainError(1,"Error in FFTReal2D::Setup(int): " "Requested size2 (%d) is not a power of 2",size2); } // Allocate new space ReleaseMemory(); scratch=new MyComplex[OC_MAX(size1,size2)]; scratchb=new MyComplex[OC_MAX(size1,size2)]; vecsize1=size1; vecsize2=size2; } void FFTReal2D::SetupInverse(int size1,int size2) { if(size1==vecsize1 && size2==vecsize2 && workarr!=NULL) return; // Nothing to do Setup(size1,size2); if(workarr!=NULL) { delete[] workarr[0]; delete[] workarr; } // Safety int rowcount=(vecsize1/2)+1; workarr=new MyComplex*[rowcount]; workarr[0]=new MyComplex[rowcount*vecsize2]; for(int i=1;icsize1/2, with the second indices interpreted 'mod csize2'. int i,j; for(i=1;i=2", vecsize1,vecsize2); int i,j; FFT_REAL_TYPE x1,x2,y1,y2; FFT_REAL_TYPE xb1,xb2,yb1,yb2; // Do FFT on columns, 2 at a time for(j=0;j+3=2", vecsize1,vecsize2); int i,j; FFT_REAL_TYPE x1,x2,y1,y2; FFT_REAL_TYPE xb1,xb2,yb1,yb2; // Do row FFT's for(i=0;i+1=2", vecsize1,vecsize2); int i,j; FFT_REAL_TYPE x1,y1,x2,y2; FFT_REAL_TYPE xb1,yb1,xb2,yb2; // Do row inverse FFT's // Handle the first & csize1'th row specially. These rows are // the DFT's of real sequences, so they each satisfy the conjugate // symmetry condition workarr[0][0]=MyComplex(carr[0][0].real(),carr[csize1-1][0].real()); for(j=1;j=2", vecsize1,vecsize2); int i,j; FFT_REAL_TYPE x1,y1,x2,y2,xb1,yb1,xb2,yb2; // Column iFFT's // Handle the first & csize2/2'th column specially. These cols are // the DFT's of real sequences, so they each satisfy the conjugate // symmetry condition scratch[0]=MyComplex(carr[0][0].real(),carr[0][csize2/2].real()); for(i=1;i workarr[k/2]. for(i=0;i1. // This routine handles the special case where (at least) one of // the dimension is 1, which degenerates into a simple 1D FFT. if(csize1==1) { // Single row FFT int j; for(j=0;j1. // This routine handles the special case where (at least) one of // the dimension is 1, which degenerates into a simple 1D FFT. if(csize1==1) { // Single row iFFT int j; for(j=0;j= rsize2 (=%d)\n", csize2,rsize2); if(csize1<(rsize1/2)+1) PlainError(1,"Error in FFTRealD::Forward(int,int,double**,int,int," "MyComplex**): csize1 (=%d) *must* be >= (rsize1/2)+1" " (=%d)\n",csize1,(rsize1/2)+1); Setup(OC_MAX(1,2*(csize1-1)),csize2); if(vecsize1<1 || vecsize2<1) return; // Nothing to do // Check for 1D degenerate cases if(vecsize1==1 || vecsize2==1) { Forward1D(rsize1,rsize2,rarr,csize1,csize2,carr); } else { // Determine which Forward routine to call (ForwardCR or ForwardRC) // (Use double arithmetic to protect against integer overflow. If // the two times are very close, then the choice doesn't really // matter.) // 1) Estimated (proportional) time for ForwardCR REALWIDE crtime=REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize2) + REALWIDE(vecsize1*rsize2)*REALWIDE(logsize1); // 2) Estimated (proportional) time for ForwardRC REALWIDE rctime=REALWIDE(rsize1*vecsize2)*REALWIDE(logsize2) + REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize1); // Introduce empirical adjustment factor rctime*=CRRCspeedratio; if(crtime<=rctime) ForwardCR(rsize1,rsize2,rarr,csize1,csize2,carr); else ForwardRC(rsize1,rsize2,rarr,csize1,csize2,carr); } } void FFTReal2D::Inverse(int csize1,int csize2, const MyComplex* const* carr, int rsize1,int rsize2,double** rarr) { if(csize2=" " rsize2 (=%d)\n",csize2,rsize2); if(csize1<(rsize1/2)+1) PlainError(1,"Error in FFTRealD::Inverse(int,int,double**," "int,int,MyComplex**): csize1 (=%d) *must* be >=" " (rsize1/2)+1 (=%d)\n",csize1,(rsize1/2)+1); SetupInverse(OC_MAX(1,2*(csize1-1)),csize2); if(vecsize1<1 || vecsize2<1) return; // Nothing to do // Check for 1D degenerate cases if(vecsize1==1 || vecsize2==1) { Inverse1D(csize1,csize2,carr,rsize1,rsize2,rarr); } else { // Determine which Inverse routine to call (InverseRC or InverseCR) // (Use double arithmetic to protect against integer overflow. // If the two times are very close, then the choice doesn't really // matter.) // 1) Estimated (proportional) time for InverseRC (==ForwardCR) REALWIDE irctime=REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize2) + REALWIDE(vecsize1*rsize2)*REALWIDE(logsize1); // 2) Estimated (proportional) time for InverseCR (==ForwardRC) REALWIDE icrtime=REALWIDE(rsize1*vecsize2)*REALWIDE(logsize2) + REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize1); // Introduce empirical adjustment factor icrtime*=CRRCspeedratio; if(irctime<=icrtime) InverseRC(csize1,csize2,carr,rsize1,rsize2,rarr); else InverseCR(csize1,csize2,carr,rsize1,rsize2,rarr); } } #ifdef USE_MPI static FFT fft1_mpi,fft2_mpi; static int vecsize1_mpi(0),vecsize2_mpi(0); static MyComplex* scratch_mpi(NULL); static MyComplex** workarr_mpi(NULL); FFTReal2D_mpi::FFTReal2D_mpi() { } void SetupMemory_mpi(int size1,int size2) { if(size1!=vecsize1_mpi) { if(scratch_mpi!=NULL) delete[] scratch_mpi; vecsize1_mpi=size1; scratch_mpi=new MyComplex[vecsize1_mpi]; } if(size2!=vecsize2_mpi) { if(workarr_mpi!=NULL) { delete[] workarr_mpi[0]; delete[] workarr_mpi; } vecsize2_mpi=size2; int rowcount=(vecsize1_mpi/2)+1; workarr_mpi=new MyComplex*[rowcount]; workarr_mpi[0]=new MyComplex[rowcount*vecsize2_mpi]; for(int i=1;i=csize1, with the second indices interpreted 'mod csize2'. // The dimensions csize2 and 2*(csize1-1) *must* be powers of 2, // but rsize1 and rsize2 do not. On import, the rarr will be // implicitly zero-padded as necessary to fit into the specified // output array carr. To get a non-periodic transform, set // // 2*(csize1-1) to 2*(first power of 2 >= rsize1), and // csize2 to 2*(first power of 2 >= rsize2). int i,j; FFT_REAL_TYPE x1,y1,x2,y2; int vecsize1=OC_MAX(1,2*(csize1-1)); int vecsize2=csize2; for(i=vecsize1;i>2;i/=2) if(i%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Forward(int): " "Requested csize1 - 1 (%d - 1) is not a power of 2", csize1); for(j=vecsize2;j>2;j/=2) if(j%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Forward(int): " "Requested csize2 (%d) is not a power of 2", csize2); if(vecsize1==0 || vecsize2==0) return; // Nothing to do SetupMemory_mpi_master_b(vecsize1,vecsize2); // Copy input data into packed complex array for(i=0;i2;i/=2) if(i%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Inverse(int): " "Requested csize1 - 1 (%d - 1) is not a power of 2", csize1); for(j=vecsize2;j>2;j/=2) if(j%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Inverse(int): " "Requested csize2 (%d) is not a power of 2", csize2); if(vecsize1==0 || vecsize2==0) return; // Nothing to do SetupMemory_mpi(vecsize1,vecsize2); // Do row inverse FFT's // Handle the first & csize1'th row specially. These rows are // the DFT's of real sequences, so they each satisfy the conjugate // symmetry condition workarr_mpi[0][0]=MyComplex(carr[0][0].real(),carr[csize1-1][0].real()); for(j=1;j