fft.txt

Modifed FFT routine file - Anil Prabhakar, 07/22/2013 02:55 PM

Download (47.1 KB)

 
1
/* FILE: fft.cc             -*-Mode: c++-*-
2
 *
3
 * C++ code to do 1 and 2 dimensional FFT's.
4
 *
5
 */
6

    
7
#include <string.h>
8

    
9
#include "nb.h"
10
#include "fft.h"
11

    
12
#ifdef USE_MPI
13
#include "mmsmpi.h"
14
#endif /* USE_MPI */
15

    
16
/* End includes */
17

    
18
#undef USE_COMPLEX
19

    
20
#ifndef OLD_CODE
21
#define CMULT(xr,xi,yr,yi,zr,zi) (zr) = (xr)*(yr)-(xi)*(yi), \
22
                                 (zi) = (xr)*(yi)+(xi)*(yr)
23
#else
24
extern inline void CMULT(const MY_COMPLEX_REAL_TYPE &xr,
25
			 const MY_COMPLEX_REAL_TYPE &xi,
26
			 const MY_COMPLEX_REAL_TYPE &yr,
27
			 const MY_COMPLEX_REAL_TYPE &yi,
28
			 MY_COMPLEX_REAL_TYPE &zr,
29
			 MY_COMPLEX_REAL_TYPE &zi)
30
{
31
  zr = xr*yr-xi*yi;
32
  zi = xr*yi+xi*yr;
33
}
34
#endif // OLD_CODE
35

    
36
void FFT::ReleaseMemory(void)
37
{
38
  if(vecsize>0) {
39
    if(Uforward!=(MyComplex *)NULL)  delete[] Uforward;
40
    if(Uinverse!=(MyComplex *)NULL)  delete[] Uinverse;
41
    if(permindex!=(int *)NULL)     delete[] permindex;
42
  }
43
  vecsize=0;
44
  Uforward=Uinverse=(MyComplex *)NULL;
45
  permindex=(int *)NULL;
46
}
47

    
48
FFT::~FFT(void)
49
{
50
  ReleaseMemory();
51
}
52

    
53
void FFT::Setup(int size)
54
{
55
  if(size==vecsize) return;
56
  if(size<1)  PlainError(1,"Error in FFT::Setup(int): "
57
		       "Requested length (%d) must be >0",size);
58
  int k;
59
  // Check that size is power of 2
60
  for(k=size;k>2;k/=2)
61
    if(k%2!=0) PlainError(1,"Error in FFT::Setup(int): "
62
			"Requested length (%d) is not a power of 2",size);
63
  ReleaseMemory();
64
  vecsize=size;
65

    
66
  // Allocate and setup MyComplex arrays
67
  if((Uforward=new MyComplex[size])==0)
68
    PlainError(1,"Error in FFT::Setup %s",ErrNoMem);
69
  if((Uinverse=new MyComplex[size])==0)
70
    PlainError(1,"Error in FFT::Setup %s",ErrNoMem);
71
#ifdef ORIG_CODE
72
  double baseang= -2*PI/double(size);
73
  Uforward[0]=Uinverse[0]=MyComplex(1,0);
74
  double x,y;
75
  for(k=1;k<size/2;k++) {
76
    x = cos(baseang*k);
77
    y = -sqrt(1-x*x);
78
    y += (1-(x*x+y*y))/(2*y);  // Tiny error correction
79
    x += (1-(x*x+y*y))/(2*x);
80
    Uforward[k]=Uinverse[size-k]=MyComplex(x,y);
81
    Uforward[size-k]=Uinverse[k]=MyComplex(x,-y);
82
  }
83
  if(size>1) {
84
    Uforward[size/2]=Uinverse[size/2]=MyComplex(-1,0);
85
  }
86
#else
87
  double baseang = -2*PI/double(size);
88
  for(k=1;k<size/8;k++) {
89
    double angle=k*baseang;
90
    double y=sin(angle);
91
    double x=cos(angle);
92

    
93

    
94
    Uforward[k]=Uinverse[size-k]=MyComplex(x,y);
95
    Uforward[size/2-k]=Uinverse[size/2+k]=MyComplex(-x,y);
96
    Uforward[size/2+k]=Uinverse[size/2-k]=MyComplex(-x,-y);
97
    Uforward[size-k]=Uinverse[k]=MyComplex(x,-y);
98

    
99
    Uforward[size/4-k]=Uinverse[3*size/4+k]=MyComplex(-y,-x);
100
    Uforward[size/4+k]=Uinverse[3*size/4-k]=MyComplex(y,-x);
101
    Uforward[3*size/4-k]=Uinverse[size/4+k]=MyComplex(y,x);
102
    Uforward[3*size/4+k]=Uinverse[size/4-k]=MyComplex(-y,x);
103
  }
104
  Uforward[0]=Uinverse[0]=MyComplex(1,0);
105
  if(size>1) {
106
    Uforward[size/2]=Uinverse[size/2]=MyComplex(-1,0);
107
  }
108
  if(size>3) {
109
    Uforward[size/4]=Uinverse[3*size/4]=MyComplex(0,-1);
110
    Uforward[3*size/4]=Uinverse[size/4]=MyComplex(0,1);
111
  }
112
  if(size>7) {
113
    double x=SQRT1_2;  // 1/sqrt(2)
114
    double y=-x;
115
    Uforward[size/8]=Uinverse[7*size/8]=MyComplex(x,y);
116
    Uforward[3*size/8]=Uinverse[5*size/8]=MyComplex(-x,y);
117
    Uforward[5*size/8]=Uinverse[3*size/8]=MyComplex(-x,-y);
118
    Uforward[7*size/8]=Uinverse[size/8]=MyComplex(x,-y);
119
  }
120
#endif // ORIG_CODE
121

    
122
  // Allocate and setup (bit-reversal) permutation index
123
  if((permindex=new int[size])==0)
124
    PlainError(1,"Error in FFT::Setup %s",ErrNoMem);
125
  permindex[0]=0;
126
  int m,n;    // The following code relies heavily on size==2^log2vecsize
127
  for(k=1,n=size>>1;k<size;k++) {
128
    // At each step, n is bit-reversed pattern of k
129
    if(n>k) permindex[k]=n;  // Swap index
130
    else permindex[k]=0;     // Do nothing: Index already swapped or the same
131
    // Calculate next n
132
    m=size>>1;
133
    while(m>0 && n&m) { n-=m; m>>=1; }
134
    n+=m;
135
  }
136
}
137

    
138
void FFT::ForwardDecFreq(int size,MyComplex *vec,FFT_REAL_TYPE divisor)
139
{
140
  if(divisor==0) divisor=1.0;  // Default is no normalization on forward FFT
141
  Setup(size);
142
  BaseDecFreqForward(vec);
143
  Permute(vec);
144
  if(divisor!=0. && divisor!=1.) {
145
    MY_COMPLEX_REAL_TYPE mult=1./divisor;
146
    for(int k=0;k<size;k++) vec[k]*=mult;
147
  }
148
}
149

    
150
void FFT::InverseDecTime(int size,MyComplex *vec,FFT_REAL_TYPE divisor)
151
{
152
  if(divisor==0) divisor=(FFT_REAL_TYPE)size;
153
  /// Default divisor on iFFT is 'size'
154
  Setup(size);
155
  Permute(vec);
156
  BaseDecTimeInverse(vec);
157
  if(divisor!=0. && divisor!=1.) {
158
    MY_COMPLEX_REAL_TYPE mult=1./divisor;
159
    for(int k=0;k<size;k++) vec[k]*=mult;
160
  }
161

    
162
}
163

    
164
inline void Swap(MyComplex &a,MyComplex &b)
165
{ MyComplex c(a); a=b; b=c; }
166

    
167
void FFT::Permute(MyComplex *vec)
168
{ /* Bit reversal permutation */
169
  int i,j;
170
  for(i=0;i<vecsize;i++) {
171
    if((j=permindex[i])!=0) Swap(vec[i],vec[j]);
172
  }
173
}
174

    
175
void FFT::BaseDecFreqForward(MyComplex *vec)
176
{ // In-place forward FFT using Decimation in Frequency technique,
177
  // *WITHOUT* resuffling of indices.
178
  // NOTE 1: This code does not use the Complex class, because
179
  //         some compilers do not effectively optimize around
180
  //         Complex operations such as multiplication.  So this
181
  //         routine just makes use of ordinary type "FFT_REAL_TYPE"
182
  //         variables, and assumes each Complex variable is
183
  //         actually two consecutive "MY_COMPLEX_REAL_TYPE" variables.
184
  // NOTE 2: See notes in MJD's micromagnetics notebook, 11-Sep-96
185
  //         (p. 62) and 29-Sep-96 (p. 69).
186
  // NOTE 3: This code has been optimized for performance on cascade.cam,
187
  //         a PentiumPro 200 MHz machine using a stock gcc 2.7.2
188
  //         compiler.  In particular, the x86 chips suffer from a
189
  //         shortage of registers.
190
  // NOTE 4: Some compromise made to RISC architectures on 27-May-1997,
191
  //         by moving all loads before any stores in main loop.  As
192
  //         done, it hurts performance on PentiumPro-200 only a couple
193
  //         of percent. (mjd)
194

    
195
  #define datat float                  /*Added by Guru on 11/10/2010: created data type datat; specified datat as required*/
196
  typedef datat FFT_REAL_TYPE;         /*Added by Guru on 11/10/2010: converted FFT_REAL_TYPE to datat from double*/ 
197

    
198

    
199
  if(vecsize==1) return; // Nothing to do
200

    
201
  MY_COMPLEX_REAL_TYPE *v;
202

    
203
  MY_COMPLEX_REAL_TYPE const *const U=(MY_COMPLEX_REAL_TYPE *)Uforward;
204
  MY_COMPLEX_REAL_TYPE *const dvec=(MY_COMPLEX_REAL_TYPE *)vec;
205

    
206
  int block,blocksize,blockcount,offset,uoff1;
207
  int halfbs,threehalfbs; // Half blocksize,3/2 blocksize
208
  FFT_REAL_TYPE m1x,m1y,m2x,m2y,m3x,m3y;
209
  FFT_REAL_TYPE x0,y0,x1,y1,x2,y2,x3,y3;
210
  FFT_REAL_TYPE xs02,ys02,xd02,yd02,xs13,ys13,xd13,yd13;
211
  FFT_REAL_TYPE t1x,t1y,t2x,t2y,t3x,t3y;
212

    
213

    
214
  // Blocksize>4
215
  for(blocksize=vecsize,blockcount=1;blocksize>4;
216
      blocksize/=4,blockcount*=4) {
217
    // Loop through double-step matrix multiplications
218
    halfbs=blocksize/2; threehalfbs=blocksize+halfbs;
219
    for(block=0,v=dvec;block<blockcount;block++,v+=2*blocksize) {
220
      for(offset=0;offset<halfbs;offset+=2) {
221
	uoff1=offset*blockcount;
222
	m1x=U[uoff1];	m1y=U[uoff1+1];
223
	m2x=U[2*uoff1];	m2y=U[2*uoff1+1];
224
	m3x=U[3*uoff1];	m3y=U[3*uoff1+1];
225

    
226
	x0=v[offset];
227
        y0=v[offset+1];
228
	x1=v[halfbs+offset];
229
	y1=v[halfbs+offset+1];
230
	x2=v[blocksize+offset];
231
	y2=v[blocksize+offset+1];
232
	x3=v[threehalfbs+offset];
233
	y3=v[threehalfbs+offset+1];
234

    
235
	xs02=x0+x2;	xs13=x1+x3;
236
	v[offset]=xs02+xs13;
237
	t1x=xs02-xs13;
238

    
239
	ys02=y0+y2;	ys13=y1+y3;
240
        v[offset+1]=ys02+ys13;
241
	t1y=ys02-ys13;
242

    
243
	v[halfbs+offset]    =t1x*m2x-t1y*m2y;
244
	v[halfbs+offset+1]  =t1y*m2x+t1x*m2y;
245

    
246
	xd02=x0-x2;	yd13=y1-y3;
247
	t3x=xd02-yd13;  t2x=xd02+yd13;
248
        yd02=y0-y2;	xd13=x1-x3;
249
	t3y=yd02+xd13;  t2y=yd02-xd13;
250
	v[blocksize+offset]  =t2x*m1x-t2y*m1y;
251
	v[blocksize+offset+1]=t2y*m1x+t2x*m1y;
252
	v[threehalfbs+offset]  =t3x*m3x-t3y*m3y;
253
	v[threehalfbs+offset+1]=t3y*m3x+t3x*m3y;
254
      }
255
    }
256
  }
257

    
258
  // Do smallest blocks; size is either 4 or 2
259
  if(blocksize==4) {
260
    blockcount=vecsize/4;
261
    for(block=0,v=dvec;block<blockcount;block++,v+=8) {
262
      x0=v[0];      y0=v[1];      x1=v[2];      y1=v[3];
263
      x2=v[4];      y2=v[5];      x3=v[6];      y3=v[7];
264

    
265
      xs02=x0+x2;
266
      xs13=x1+x3; 
267
      v[0]=xs02+xs13;
268
      v[2]=xs02-xs13;
269

    
270
      ys02=y0+y2;
271
      ys13=y1+y3;
272
      v[1]=ys02+ys13;
273
      v[3]=ys02-ys13;
274

    
275
      xd02=x0-x2;
276
      yd13=y1-y3;
277
      v[4]=xd02+yd13;
278
      v[6]=xd02-yd13;
279

    
280
      yd02=y0-y2;
281
      xd13=x1-x3;
282
      v[5]=yd02-xd13;
283
      v[7]=yd02+xd13;
284
    }
285
  }
286
  else { // blocksize==2
287
    blockcount=vecsize/2;
288
    for(block=0,v=dvec;block<blockcount;block++,v+=4) {
289
      x0=v[0];      y0=v[1];
290
      x1=v[2];      y1=v[3];
291
      v[0]=x0+x1;   v[2]=x0-x1;
292
      v[1]=y0+y1;   v[3]=y0-y1;
293
    }
294
  }
295
}
296

    
297
void FFT::BaseDecTimeInverse(MyComplex *vec)
298
{ // In-place inverse FFT using Decimation in Time technique,
299
  // *WITHOUT* resuffling of indices.
300
  // NOTE 1: This code does not use the Complex class, because
301
  //         some compilers do not effectively optimize around
302
  //         Complex operations such as multiplication.  So this
303
  //         routine just makes use of ordinary type "FFT_REAL_TYPE"
304
  //         variables, and assumes each Complex variable is
305
  //         actually two consecutive "MY_COMPLEX_REAL_TYPE" variables.
306
  // NOTE 2: See notes in MJD's micromagnetics notebook, 11-Sep-96
307
  //         (p. 62) and 29-Sep-96 (p. 69).
308
  // NOTE 3: This code has been optimized for performance on cascade.cam,
309
  //         a PentiumPro 200 MHz machine using a stock gcc 2.7.2
310
  //         compiler.  In particular, the x86 chips suffer from a
311
  //         shortage of registers.
312
  // NOTE 4: Some compromise made to RISC architectures on 27-May-1997,
313
  //         by moving all loads before any stores in main loop.  As
314
  //         done, it hurts performance on PentiumPro-200 only a couple
315
  //         of percent. (mjd)
316

    
317
  #define datat float                  /*Added by Guru on 11/10/2010: created data type datat; specified datat as required*/
318
  typedef datat FFT_REAL_TYPE;         /*Added by Guru on 11/10/2010: converted FFT_REAL_TYPE to datat from double*/
319

    
320
  (vecsize==1) return; // Nothing to do
321

    
322
  MY_COMPLEX_REAL_TYPE *v;
323

    
324
  MY_COMPLEX_REAL_TYPE const *const U=(MY_COMPLEX_REAL_TYPE *)Uinverse;
325
  MY_COMPLEX_REAL_TYPE *const dvec=(MY_COMPLEX_REAL_TYPE *)vec;
326

    
327
  int block,blocksize,blockcount,offset,uoff1;
328
  int halfbs,threehalfbs; // Half blocksize,3/2 blocksize
329
  FFT_REAL_TYPE m1x,m1y,m2x,m2y,m3x,m3y;
330
  FFT_REAL_TYPE x0,y0,x1,y1,x2,y2,x3,y3;
331
  FFT_REAL_TYPE xs01,ys01,xd01,yd01,xs23,ys23,xd23,yd23;
332
  FFT_REAL_TYPE t1x,t1y,t2x,t2y,t3x,t3y;
333

    
334
  // Do smallest blocks; size is either 4 or 2
335
  if(vecsize>2) {
336
    blockcount=vecsize/4;
337
    for(block=0,v=dvec;block<blockcount;block++,v+=8) {
338
      x0=v[0];      y0=v[1];
339
      x1=v[2];      y1=v[3];
340
      x2=v[4];      y2=v[5];
341
      x3=v[6];      y3=v[7];
342
      xs01=x0+x1;      xs23=x2+x3;   // See NOTE 3 above
343
      v[0]=xs01+xs23;      v[4]=xs01-xs23;
344
      ys01=y0+y1;      ys23=y2+y3;
345
      v[1]=ys01+ys23;      v[5]=ys01-ys23;
346
      xd01=x0-x1;      yd23=y2-y3;
347
      v[2]=xd01-yd23;      v[6]=xd01+yd23;
348
      yd01=y0-y1;      xd23=x2-x3;
349
      v[3]=yd01+xd23;      v[7]=yd01-xd23;
350
    }
351
  }
352
  else { // vecsize==2
353
    x0=dvec[0];    y0=dvec[1];
354
    x1=dvec[2];    y1=dvec[3];
355
    dvec[0]=x0+x1;
356
    dvec[2]=x0-x1;
357
    dvec[1]=y0+y1;
358
    dvec[3]=y0-y1;
359
    return;
360
  }
361

    
362
  // Blocksize>4
363
  for(blocksize=16,blockcount=vecsize/16;blocksize<=vecsize;
364
      blocksize*=4,blockcount/=4) {
365
    // Loop through double-step matric multiplications
366
    halfbs=blocksize/2; threehalfbs=blocksize+halfbs;
367
    for(block=0,v=dvec;block<blockcount;block++,v+=2*blocksize) {
368
      for(offset=0;offset<blocksize/2;offset+=2) {
369
	x0=v[offset];
370
	y0=v[offset+1];
371
	t2x=v[blocksize+offset];
372
	t2y=v[blocksize+offset+1];
373
	uoff1=offset*blockcount;
374
	m1x=U[uoff1];  m1y=U[uoff1+1];
375
	x2=t2x*m1x-t2y*m1y;
376
	y2=t2y*m1x+t2x*m1y;
377

    
378
	m2x=U[2*uoff1];  m2y=U[2*uoff1+1];
379
	t1x=v[halfbs+offset];
380
	t1y=v[halfbs+offset+1];
381
	x1=t1x*m2x-t1y*m2y;
382
	y1=t1y*m2x+t1x*m2y;
383

    
384
	t3x=v[threehalfbs+offset];
385
	t3y=v[threehalfbs+offset+1];
386
	m3x=U[3*uoff1];  m3y=U[3*uoff1+1];
387
	x3=t3x*m3x-t3y*m3y;
388
	y3=t3y*m3x+t3x*m3y;
389

    
390
	xs01=x0+x1;	xs23=x2+x3;
391
	v[            offset  ] = xs01+xs23;
392
	v[  blocksize+offset  ] = xs01-xs23;
393

    
394
	ys01=y0+y1;     ys23=y2+y3;
395
        v[            offset+1] = ys01+ys23;
396
	v[  blocksize+offset+1] = ys01-ys23;
397

    
398
	yd01=y0-y1;	xd23=x2-x3;
399
	v[  halfbs   +offset+1] = yd01+xd23;
400
	v[threehalfbs+offset+1] = yd01-xd23;
401

    
402
	xd01=x0-x1;	yd23=y2-y3;
403
	v[  halfbs   +offset  ] = xd01-yd23;
404
	v[threehalfbs+offset  ] = xd01+yd23;
405
      }
406
    }
407
  }
408

    
409
  if(blocksize==2*vecsize) {
410
    // We still have to do one single-step matrix multiplication
411
    blocksize=vecsize;  v=dvec;
412
    for(offset=0;offset<blocksize;offset+=2,v+=2) {
413
      x0=v[0];              y0=v[1];
414
      t1x=v[vecsize];       t1y=v[vecsize+1];
415
      m1x=U[offset];        m1y=U[offset+1];
416
      x1=t1x*m1x-t1y*m1y;   y1=t1y*m1x+t1x*m1y;
417
      v[0]         = x0+x1; 
418
      v[vecsize]   = x0-x1;
419
      v[1]         = y0+y1;
420
      v[vecsize+1] = y0-y1;
421
    }
422
  }
423

    
424
}
425

    
426
const REALWIDE FFTReal2D::CRRCspeedratio=1.10;
427
/// Relative speed of ForwardCR as compared to ForwardRC.
428
/// If bigger than 1, then ForwardCR is faster.  This will
429
/// be machine & compiler dependent...oh well.
430

    
431
void FFTReal2D::Setup(int size1,int size2)
432
{ // Note: This routine is also called by FFTReal2D::SetupInverse()
433
  if(size1==vecsize1 && size2==vecsize2) return;  // Nothing to do
434

    
435
  // Check that sizes are powers of 2, and >= 1.  Also extract
436
  // base-2 log of sizes
437
  if(size1<1) PlainError(1,"Error in FFTReal2D::Setup(int): "
438
			 "Requested size1 (%d) must be >=1",size1);
439
  if(size2<1) PlainError(1,"Error in FFTReal2D::Setup(int): "
440
			 "Requested size2 (%d) must be >=1",size2);
441

    
442
  int k;
443
  if(size1==1) {
444
    logsize1=0;
445
  } else {
446
    for(k=size1,logsize1=1;k>2;k/=2,logsize1++)
447
      if(k%2!=0) PlainError(1,"Error in FFTReal2D::Setup(int): "
448
			    "Requested size1 (%d) is not a power of 2",size1);
449
  }
450
  if(size2==1) {
451
    logsize2=0;
452
  } else {
453
    for(k=size2,logsize2=1;k>2;k/=2,logsize2++)
454
      if(k%2!=0) PlainError(1,"Error in FFTReal2D::Setup(int): "
455
			    "Requested size2 (%d) is not a power of 2",size2);
456
  }
457

    
458
  // Allocate new space
459
  ReleaseMemory();
460
  scratch=new MyComplex[OC_MAX(size1,size2)];
461
  scratchb=new MyComplex[OC_MAX(size1,size2)];
462
  vecsize1=size1; vecsize2=size2;
463
}
464

    
465
void FFTReal2D::SetupInverse(int size1,int size2)
466
{
467
  if(size1==vecsize1 && size2==vecsize2 && workarr!=NULL)
468
    return;  // Nothing to do
469
  Setup(size1,size2);
470
  if(workarr!=NULL) { delete[] workarr[0]; delete[] workarr; } // Safety
471
  int rowcount=(vecsize1/2)+1;
472
  workarr=new MyComplex*[rowcount];
473
  workarr[0]=new MyComplex[rowcount*vecsize2];
474
  for(int i=1;i<rowcount;i++) workarr[i]=workarr[i-1]+vecsize2;
475
}
476

    
477
void FFTReal2D::ReleaseMemory()
478
{
479
  if(vecsize1==0 || vecsize2==0) return;
480
  delete[] scratch;   scratch=NULL;  
481
  delete[] scratchb;  scratchb=NULL;  
482
  if(workarr!=NULL) {
483
    delete[] workarr[0];
484
    delete[] workarr;
485
    workarr=NULL;
486
  }
487
  vecsize1=0; vecsize2=0;
488
  fft1.ReleaseMemory(); fft2.ReleaseMemory();
489
}
490

    
491
void FFTReal2D::FillOut(int csize1,int csize2,MyComplex** carr)
492
{ // This routine assumes carr is a top-half-filled DFT of
493
  // a real function, and fills in the bottom half using the
494
  // relation
495
  //      carr[csize1-i][csize2-j]=conj(carr[i][j])
496
  // for i>csize1/2, with the second indices interpreted 'mod csize2'.
497
  int i,j;
498
  for(i=1;i<csize1/2;i++) {
499
    carr[csize1-i][0]=conj(carr[i][0]);
500
    for(j=1;j<csize2;j++) {
501
      carr[csize1-i][j]=conj(carr[i][csize2-j]);
502
    }
503
  }
504

    
505
}
506

    
507
void FFTReal2D::ForwardCR(int rsize1,int rsize2,
508
			  const double* const* rarr,
509
			  int csize1,int csize2,MyComplex** carr)
510
{
511
  Setup(OC_MAX(1,2*(csize1-1)),csize2); // Safety
512
  if(vecsize1<2 || vecsize2<2) 
513
    PlainError(1,"Error in FFTReal2D::ForwardCR(...): "
514
	       "Full array dimensions (%dx%d) must be both >=2",
515
	       vecsize1,vecsize2);
516

    
517
  int i,j;
518
  FFT_REAL_TYPE x1,x2,y1,y2;
519
  FFT_REAL_TYPE xb1,xb2,yb1,yb2;
520

    
521
  // Do FFT on columns, 2 at a time
522
  for(j=0;j+3<rsize2;j+=4) {
523
    // Pack into MyComplex scratch array
524
    for(i=0;i<rsize1;i++) {
525
      scratch[i]=MyComplex(rarr[i][j],rarr[i][j+1]);
526
      scratchb[i]=MyComplex(rarr[i][j+2],rarr[i][j+3]);
527
    }
528
    // Zero pad scratch space
529
    for(i=rsize1;i<vecsize1;i++) scratch[i]=MyComplex(0.,0.);
530
    for(i=rsize1;i<vecsize1;i++) scratchb[i]=MyComplex(0.,0.);
531
    // Do complex FFT
532
    fft1.ForwardDecFreq(vecsize1,scratchb);
533
    fft1.ForwardDecFreq(vecsize1,scratch);
534
    // Unpack into top half of 2D complex array
535
    // Rows 0 & vecsize1/2 are real-valued, so pack them together
536
    // into row 0 (row 0 as real part, row vecsize1/2 as imag. part).
537
    carr[0][j]  =MyComplex(scratch[0].real(),scratch[vecsize1/2].real());
538
    carr[0][j+1]=MyComplex(scratch[0].imag(),scratch[vecsize1/2].imag());
539
    carr[0][j+2]=MyComplex(scratchb[0].real(),scratchb[vecsize1/2].real());
540
    carr[0][j+3]=MyComplex(scratchb[0].imag(),scratchb[vecsize1/2].imag());
541
    for(i=1;i<vecsize1/2;i++) { // ASSUMES vecsize1 is even!
542
      x1=scratch[i].real()/2;            y1=scratch[i].imag()/2;
543
      x2=scratch[vecsize1-i].real()/2;   y2=scratch[vecsize1-i].imag()/2;
544
      xb1=scratchb[i].real()/2;          yb1=scratchb[i].imag()/2;
545
      xb2=scratchb[vecsize1-i].real()/2; yb2=scratchb[vecsize1-i].imag()/2;
546
      carr[i][j]   =MyComplex(x1+x2,y1-y2);
547
      carr[i][j+1] =MyComplex(y1+y2,x2-x1);
548
      carr[i][j+2] =MyComplex(xb1+xb2,yb1-yb2);
549
      carr[i][j+3] =MyComplex(yb1+yb2,xb2-xb1);
550
    }
551
  }
552
  // Case rsize2 not divisible by 4
553
  for(;j<rsize2;j+=2) {
554
    // Pack into complex scratch array
555
    if(j+1<rsize2) {
556
      for(i=0;i<rsize1;i++) scratch[i]=MyComplex(rarr[i][j],rarr[i][j+1]);
557
    }
558
    else { // rsize2 == 1 mod 2.
559
      for(i=0;i<rsize1;i++) scratch[i]=MyComplex(rarr[i][j],0.);
560
    }
561
    for(i=rsize1;i<vecsize1;i++) scratch[i]=MyComplex(0.,0.);
562
    fft1.ForwardDecFreq(vecsize1,scratch);
563
    carr[0][j]  =MyComplex(scratch[0].real(),scratch[vecsize1/2].real());
564
    carr[0][j+1]=MyComplex(scratch[0].imag(),scratch[vecsize1/2].imag());
565
    for(i=1;i<vecsize1/2;i++) { // ASSUMES vecsize1 is even!
566
      x1=scratch[i].real()/2;          y1=scratch[i].imag()/2;
567
      x2=scratch[vecsize1-i].real()/2; y2=scratch[vecsize1-i].imag()/2;
568
      carr[i][j]   =MyComplex(x1+x2,y1-y2);
569
      carr[i][j+1] =MyComplex(y1+y2,x2-x1);
570
    }
571
  }
572
  // Zero-pad remaining columns
573
  if(rsize2<csize2) {
574
    for(i=0;i<csize1;i++) for(j=rsize2;j<csize2;j++)
575
      carr[i][j]=MyComplex(0.,0.);
576
    // Note: One _may_ be able to gain a few percent speedup
577
    //       by using the 'memcpy' C-library routine.
578
  }
579

    
580
  // Do FFT on top half of rows
581
  for(i=0;i<csize1-1;i++) fft2.ForwardDecFreq(csize2,carr[i]);
582

    
583
  // Pull out row 0 & row csize1-1 from (packed) row 0
584
  carr[csize1-1][0] = MyComplex(carr[0][0].imag(),0.);
585
  carr[0][0]        = MyComplex(carr[0][0].real(),0.);
586
  for(j=1;j<csize2/2;j++) {
587
      x1=carr[0][j].real()/2;        y1=carr[0][j].imag()/2;
588
      x2=carr[0][csize2-j].real()/2; y2=carr[0][csize2-j].imag()/2;
589
      MyComplex temp1(x1+x2,y1-y2);
590
      MyComplex temp2(y1+y2,x2-x1);
591
      carr[0][j]                = temp1;
592
      carr[0][csize2-j]         = conj(temp1);
593
      carr[csize1-1][j]         = temp2;
594
      carr[csize1-1][csize2-j]  = conj(temp2);
595
  }
596
  carr[csize1-1][csize2/2] = MyComplex(carr[0][csize2/2].imag(),0.);
597
  carr[0][csize2/2]        = MyComplex(carr[0][csize2/2].real(),0.);
598
}
599

    
600
void FFTReal2D::ForwardRC(int rsize1,int rsize2,
601
			  const double* const* rarr,
602
			  int csize1,int csize2,MyComplex** carr)
603
{
604
  Setup(OC_MAX(1,2*(csize1-1)),csize2); // Safety
605
  if(vecsize1<2 || vecsize2<2) 
606
    PlainError(1,"Error in FFTReal2D::ForwardRC(...): "
607
	       "Full array dimensions (%dx%d) must be both >=2",
608
	       vecsize1,vecsize2);
609

    
610
  int i,j;
611
  FFT_REAL_TYPE x1,x2,y1,y2;
612
  FFT_REAL_TYPE xb1,xb2,yb1,yb2;
613

    
614
  // Do row FFT's
615
  for(i=0;i+1<rsize1;i+=2) {
616
    // Pack 'MyComplex' row
617
    for(j=0;j<rsize2;j++)
618
      carr[i/2][j]=MyComplex(rarr[i][j],rarr[i+1][j]);
619
    for(j=rsize2;j<csize2;j++) carr[i/2][j]=MyComplex(0.,0.); // Zero pad
620
    // Do FFT
621
    fft2.ForwardDecFreq(csize2,carr[i/2]);
622
  }
623
  for(;i<rsize1;i+=2) { // In case rsize1 == 1 mod 2
624
    for(j=0;j<rsize2;j++)
625
      carr[i/2][j]=MyComplex(rarr[i][j],0.);
626
    for(j=rsize2;j<csize2;j++) carr[i/2][j]=MyComplex(0.,0.); // Zero pad
627
    // Do FFT
628
    fft2.ForwardDecFreq(csize2,carr[i/2]);
629
  }
630
  // Any remaining rows are zero padding on the fly during
631
  // the column FFT's (see below).
632

    
633
  // Do column FFT's
634
  // Do column 0 and csize2/2, making use of the fact that
635
  // these 2 columns are 'real'.
636
  for(i=0;i<(rsize1+1)/2;i++) {
637
    x1=carr[i][0].real();         x2=carr[i][0].imag();
638
    y1=carr[i][csize2/2].real();  y2=carr[i][csize2/2].imag();
639
    scratch[2*i]     = MyComplex(x1,y1);
640
    scratch[(2*i)+1] = MyComplex(x2,y2);
641
  }
642
  for(i*=2;i<vecsize1;i++) scratch[i]=MyComplex(0.,0.); // Zero pad
643
  fft1.ForwardDecFreq(vecsize1,scratch);
644
  carr[0][0]        = MyComplex(scratch[0].real(),0.);
645
  carr[0][csize2/2] = MyComplex(scratch[0].imag(),0.);
646
  for(i=1;i<csize1-1;i++) {
647
    x1=scratch[i].real()/2;           y1=scratch[i].imag()/2;
648
    x2=scratch[vecsize1-i].real()/2;  y2=scratch[vecsize1-i].imag()/2;
649
    carr[i][0]        = MyComplex(x1+x2,y1-y2);
650
    carr[i][csize2/2] = MyComplex(y1+y2,x2-x1);
651
  }
652
  carr[csize1-1][0]        = MyComplex(scratch[csize1-1].real(),0.);
653
  carr[csize1-1][csize2/2] = MyComplex(scratch[csize1-1].imag(),0.);
654

    
655
  // Do remaining columns
656
  for(j=1;j+1<csize2/2;j+=2) {
657
    for(i=0;i<(rsize1+1)/2;i++) {
658
      x1 =carr[i][j].real()/2;           y1 =carr[i][j].imag()/2;
659
      xb1=carr[i][j+1].real()/2;         yb1=carr[i][j+1].imag()/2;
660
      xb2=carr[i][csize2-1-j].real()/2;  yb2=carr[i][csize2-1-j].imag()/2;
661
      x2 =carr[i][csize2-j].real()/2;    y2 =carr[i][csize2-j].imag()/2;
662
      scratch[2*i]     = MyComplex(x1+x2,y1-y2);
663
      scratch[(2*i)+1] = MyComplex(y1+y2,x2-x1);
664
      scratchb[2*i]     = MyComplex(xb1+xb2,yb1-yb2);
665
      scratchb[(2*i)+1] = MyComplex(yb1+yb2,xb2-xb1);
666
    }
667
    for(i*=2;i<vecsize1;i++)
668
      scratch[i]= scratchb[i]=MyComplex(0.,0.);  // Zero pad
669
    fft1.ForwardDecFreq(vecsize1,scratchb);
670
    fft1.ForwardDecFreq(vecsize1,scratch);
671
    carr[0][j]=scratch[0];     carr[0][csize2-j]=conj(scratch[0]);
672
    carr[0][j+1]=scratchb[0];  carr[0][csize2-1-j]=conj(scratchb[0]);
673
    for(i=1;i<csize1;i++) {
674
      carr[i][j]=scratch[i];
675
      carr[i][j+1]=scratchb[i];
676
      carr[i][csize2-1-j]=conj(scratchb[vecsize1-i]);
677
      carr[i][csize2-j]=conj(scratch[vecsize1-i]);
678
    }
679
  }
680
  // There should be 1 column left over
681
  if(j<csize2/2) {
682
    for(i=0;i<(rsize1+1)/2;i++) {
683
      x1=carr[i][j].real()/2;         y1=carr[i][j].imag()/2;
684
      x2=carr[i][csize2-j].real()/2;  y2=carr[i][csize2-j].imag()/2;
685
      scratch[2*i]     = MyComplex(x1+x2,y1-y2);
686
      scratch[(2*i)+1] = MyComplex(y1+y2,x2-x1);
687
    }
688
    for(i*=2;i<vecsize1;i++) scratch[i]=MyComplex(0.,0.); // Zero pad
689
    fft1.ForwardDecFreq(vecsize1,scratch);
690
    carr[0][j]=scratch[0];   carr[0][csize2-j]=conj(scratch[0]);
691
    for(i=1;i<csize1;i++) {
692
      carr[i][j]=scratch[i];
693
      carr[i][csize2-j]=conj(scratch[vecsize1-i]);
694
    }
695
  }
696
}
697

    
698
void FFTReal2D::InverseRC(int csize1,int csize2,
699
			  const MyComplex* const* carr,
700
			  int rsize1,int rsize2,double** rarr)
701
{
702
  SetupInverse(OC_MAX(1,2*(csize1-1)),csize2); // Safety.
703
  if(vecsize1<2 || vecsize2<2)
704
    PlainError(1,"Error in FFTReal2D::InverseRC(...): "
705
	       "Full array dimensions (%dx%d) must be both >=2",
706
	       vecsize1,vecsize2);
707

    
708
  int i,j;
709
  FFT_REAL_TYPE x1,y1,x2,y2;
710
  FFT_REAL_TYPE xb1,yb1,xb2,yb2;
711

    
712
  // Do row inverse FFT's
713
  // Handle the first & csize1'th row specially.  These rows are
714
  // the DFT's of real sequences, so they each satisfy the conjugate
715
  // symmetry condition
716
  workarr[0][0]=MyComplex(carr[0][0].real(),carr[csize1-1][0].real());
717
  for(j=1;j<csize2/2;j++) {
718
    x1=carr[0][j].real();         y1=carr[0][j].imag();
719
    x2=carr[csize1-1][j].real();  y2=carr[csize1-1][j].imag();
720
    workarr[0][j]        = MyComplex(x1-y2,x2+y1);
721
    workarr[0][csize2-j] = MyComplex(x1+y2,x2-y1);
722
  }
723
  workarr[0][csize2/2]=MyComplex(carr[0][csize2/2].real(),
724
			       carr[csize1-1][csize2/2].real());
725
  fft2.InverseDecTime(csize2,workarr[0],1.);
726

    
727
  // iFFT the remaining rows
728
  for(i=1;i<csize1-1;i++) {
729
    for(j=0;j<csize2;j++) workarr[i][j]=carr[i][j];
730
    fft2.InverseDecTime(csize2,workarr[i],1.);
731
  }
732

    
733
  // Now do iFFT's on columns.  These are conj. symmetric, so we
734
  // process them 2 at a time.  Also, recall the 1st row of workarr
735
  // contains the iFFT's of the 1st and csize1'th row of the given carr.
736
  for(j=0;j+3<rsize2;j+=4) {
737
    scratch[0]=
738
      MyComplex(workarr[0][j].real(),workarr[0][j+1].real());
739
    scratch[csize1-1]=
740
      MyComplex(workarr[0][j].imag(),workarr[0][j+1].imag());
741
    scratchb[0]=
742
      MyComplex(workarr[0][j+2].real(),workarr[0][j+3].real());
743
    scratchb[csize1-1]=
744
      MyComplex(workarr[0][j+2].imag(),workarr[0][j+3].imag());
745
    for(i=1;i<csize1-1;i++) {
746
      x1 =workarr[i][j].real();    y1 =workarr[i][j].imag();
747
      x2 =workarr[i][j+1].real();  y2 =workarr[i][j+1].imag();
748
      xb1=workarr[i][j+2].real();  yb1=workarr[i][j+2].imag();
749
      xb2=workarr[i][j+3].real();  yb2=workarr[i][j+3].imag();
750
      scratch[i]          = MyComplex(x1-y2,x2+y1);
751
      scratch[vecsize1-i] = MyComplex(x1+y2,x2-y1);
752
      scratchb[i]          = MyComplex(xb1-yb2,xb2+yb1);
753
      scratchb[vecsize1-i] = MyComplex(xb1+yb2,xb2-yb1);
754
    }
755
    fft1.InverseDecTime(vecsize1,scratchb,FFT_REAL_TYPE(vecsize1*vecsize2));
756
    fft1.InverseDecTime(vecsize1,scratch,FFT_REAL_TYPE(vecsize1*vecsize2));
757
    for(i=0;i<rsize1;i++) {
758
      rarr[i][j]  =scratch[i].real();
759
      rarr[i][j+1]=scratch[i].imag();
760
      rarr[i][j+2]=scratchb[i].real();
761
      rarr[i][j+3]=scratchb[i].imag();
762
    }
763
  }
764
  // Remaining columns if rsize2 is not divisible by 4.  OTOH, csize2
765
  // *is* divisible by 2, so we can assume workarr[i][j+1] exists.
766
  for(;j<rsize2;j+=2) {
767
    scratch[0]=
768
      MyComplex(workarr[0][j].real(),workarr[0][j+1].real());
769
    scratch[csize1-1]=
770
      MyComplex(workarr[0][j].imag(),workarr[0][j+1].imag());
771
    for(i=1;i<csize1-1;i++) {
772
      x1 =workarr[i][j].real();    y1 =workarr[i][j].imag();
773
      x2 =workarr[i][j+1].real();  y2 =workarr[i][j+1].imag();
774
      scratch[i]          = MyComplex(x1-y2,x2+y1);
775
      scratch[vecsize1-i] = MyComplex(x1+y2,x2-y1);
776
    }
777
    fft1.InverseDecTime(vecsize1,scratch,FFT_REAL_TYPE(vecsize1*vecsize2));
778
    for(i=0;i<rsize1;i++) {
779
      rarr[i][j]  =scratch[i].real();
780
      if(j+1<rsize2) rarr[i][j+1]=scratch[i].imag();
781
    }
782
  }
783
}
784

    
785
void FFTReal2D::InverseCR(int csize1,int csize2,
786
			  const MyComplex* const* carr,
787
			  int rsize1,int rsize2,double** rarr)
788
{
789
  SetupInverse(OC_MAX(1,2*(csize1-1)),csize2); // Safety
790
  if(vecsize1<2 || vecsize2<2)
791
    PlainError(1,"Error in FFTReal2D::InverseCR(...): "
792
	       "Full array dimensions (%dx%d) must be both >=2",
793
	       vecsize1,vecsize2);
794

    
795
  int i,j;
796
  FFT_REAL_TYPE x1,y1,x2,y2,xb1,yb1,xb2,yb2;
797

    
798
  // Column iFFT's
799
  // Handle the first & csize2/2'th column specially.  These cols are
800
  // the DFT's of real sequences, so they each satisfy the conjugate
801
  // symmetry condition
802
  scratch[0]=MyComplex(carr[0][0].real(),carr[0][csize2/2].real());
803
  for(i=1;i<csize1-1;i++) {
804
    x1=carr[i][0].real();         y1=carr[i][0].imag();
805
    x2=carr[i][csize2/2].real();  y2=carr[i][csize2/2].imag();
806
    scratch[i]          = MyComplex(x1-y2,x2+y1);
807
    scratch[vecsize1-i] = MyComplex(x1+y2,x2-y1);
808
  }
809
  scratch[csize1-1]=MyComplex(carr[csize1-1][0].real(),
810
			    carr[csize1-1][csize2/2].real());
811
  fft1.InverseDecTime(vecsize1,scratch,1);
812
  for(i=0;i<vecsize1;i+=2) { // ASSUMES vecsize1 is even
813
    // See packing note below.
814
    workarr[i/2][0]        = MyComplex(scratch[i].real(),scratch[i+1].real());
815
    workarr[i/2][csize2/2] = MyComplex(scratch[i].imag(),scratch[i+1].imag());
816
  }
817
  //
818
  // Do remaining column iFFT's, two at a time for better memory
819
  // access locality.
820
  for(j=1;j+1<csize2/2;j+=2) {
821
    scratch[0]=carr[0][j];
822
    scratchb[0]=carr[0][j+1];
823
    for(i=1;i<csize1-1;i++) {
824
      scratch[i]=carr[i][j];
825
      scratchb[i]=carr[i][j+1];
826
      scratchb[vecsize1-i]=conj(carr[i][csize2-1-j]);
827
      scratch[vecsize1-i]=conj(carr[i][csize2-j]);
828
    }
829
    scratch[csize1-1]=carr[csize1-1][j];
830
    scratchb[csize1-1]=carr[csize1-1][j+1];
831
    fft1.InverseDecTime(vecsize1,scratchb,1.);
832
    fft1.InverseDecTime(vecsize1,scratch,1.);
833
    // Pack into workarr.  Rows will be conjugate symmetric, so we
834
    // can pack two rows into 1 via r[k]+i.r[k+1] -> workarr[k/2].
835
    for(i=0;i<rsize1;i+=2) {
836
      // CAREFUL! The above 'rsize1' bound may depend on how the
837
      // iFFT's are calculated in the 'Row iFFT's' code section,
838
      // and how 'i' is initialized.
839
      x1=scratch[i].real();      y1=scratch[i].imag();
840
      x2=scratch[i+1].real();    y2=scratch[i+1].imag();
841
      xb1=scratchb[i].real();    yb1=scratchb[i].imag();
842
      xb2=scratchb[i+1].real();  yb2=scratchb[i+1].imag();
843
      workarr[i/2][j]          = MyComplex(x1-y2,x2+y1);
844
      workarr[i/2][j+1]        = MyComplex(xb1-yb2,xb2+yb1);
845
      workarr[i/2][csize2-j-1] = MyComplex(xb1+yb2,xb2-yb1);
846
      workarr[i/2][csize2-j]   = MyComplex(x1+y2,x2-y1);
847
    }
848
  }
849
  // There should be 1 column left over
850
  if((j=(csize2/2)-1)%2==1) {
851
    // Column (csize2/2)-1 *not* processed above
852
    scratch[0]=carr[0][j];
853
    for(i=1;i<csize1-1;i++) {
854
      scratch[i]=carr[i][j];
855
      scratch[vecsize1-i]=conj(carr[i][csize2-j]);
856
    }
857
    scratch[csize1-1]=carr[csize1-1][j];
858
    fft1.InverseDecTime(vecsize1,scratch,1);
859
    for(i=0;i<rsize1;i+=2) {
860
      // CAREFUL! The above 'rsize1' bound may depend on how the
861
      // iFFT's are calculated in the 'Row iFFT's' code section,
862
      // and how 'i' is initialized.
863
      x1=scratch[i].real();    y1=scratch[i].imag();
864
      x2=scratch[i+1].real();  y2=scratch[i+1].imag();
865
      workarr[i/2][j]        = MyComplex(x1-y2,x2+y1);
866
      workarr[i/2][csize2-j] = MyComplex(x1+y2,x2-y1);
867
    }
868
  }
869

    
870
  // Row iFFT's
871
  for(i=0;i<rsize1;i+=2) {
872
    fft2.InverseDecTime(vecsize2,workarr[i/2],
873
			FFT_REAL_TYPE(vecsize1*vecsize2));
874
    for(j=0;j<rsize2;j++) rarr[i][j]   = workarr[i/2][j].real();
875
    if(i+1<rsize1) {
876
      for(j=0;j<rsize2;j++) rarr[i+1][j] = workarr[i/2][j].imag();
877
    }
878
  }
879
}
880

    
881
void FFTReal2D::Forward1D(int rsize1,int rsize2,
882
			  const double* const* rarr,
883
			  int csize1,int csize2,MyComplex** carr)
884
{
885
  // The ForwardRC/CR routines assume full array dimensions >1.
886
  // This routine handles the special case where (at least) one of
887
  // the dimension is 1, which degenerates into a simple 1D FFT.
888
  if(csize1==1) { // Single row FFT
889
    int j;
890
    for(j=0;j<rsize2;j++)
891
      carr[0][j]=MyComplex(rarr[0][j],0.);
892
    for(;j<csize2;j++)
893
      carr[0][j]=MyComplex(0.,0.);
894
    fft2.ForwardDecFreq(csize2,carr[0]);
895
  } else if(csize2==1) { // Single column FFT
896
    int i;
897
    for(i=0;i<rsize1;i++)
898
      scratch[i]=MyComplex(rarr[i][0],0.0);
899
    for(;i<vecsize1;i++)
900
      scratch[i]=MyComplex(0.0,0.0);
901
    fft1.ForwardDecFreq(vecsize1,scratch);
902
    for(i=0;i<csize1;i++)
903
      carr[i][0]=scratch[i]; // Last half, from csize1 to vecsize1
904
                            /// is conj. sym. since input is real.
905
  } else {
906
    PlainError(1,"Error in FFTReal2D::Forward1D(...): "
907
	       "One array dimension (of %dx%d) must be==1",
908
	       vecsize1,vecsize2);
909
  }
910
}
911

    
912
void FFTReal2D::Inverse1D(int csize1,int csize2,
913
			  const MyComplex* const* carr,
914
			  int rsize1,int rsize2,double** rarr)
915
{
916
  // The InverseRC/CR routines assume full array dimensions >1.
917
  // This routine handles the special case where (at least) one of
918
  // the dimension is 1, which degenerates into a simple 1D FFT.
919
  if(csize1==1) { // Single row iFFT
920
    int j;
921
    for(j=0;j<csize2;j++)
922
      scratch[j]=carr[0][j];
923
    fft2.InverseDecTime(csize2,scratch);
924
    for(j=0;j<rsize2;j++)
925
      rarr[0][j]=scratch[j].real();
926
  } else if(csize2==1) { // Single column iFFT
927
    int i;
928
    scratch[0]=carr[0][0];
929
    for(i=1;i<csize1-1;i++) {
930
      scratch[i]=carr[i][0];
931
      scratch[vecsize1-i]=conj(carr[i][0]); // Last half obtained
932
      /// by using conjugate symmetry of real data FFT.
933
    }
934
    scratch[csize1-1]=carr[csize1-1][0];
935
    fft2.InverseDecTime(vecsize1,scratch);
936
    for(i=0;i<rsize1;i++)
937
      rarr[i][0]=scratch[i].real();
938
  } else {
939
    PlainError(1,"Error in FFTReal2D::Inverse1D(...): "
940
	       "One array dimension (of %dx%d) must be==1",
941
	       vecsize1,vecsize2);
942
  }
943
}
944

    
945
void FFTReal2D::Forward(int rsize1,int rsize2,
946
			const double* const* rarr,
947
			int csize1,int csize2,MyComplex** carr)
948
{
949
  if(csize2<rsize2) 
950
    PlainError(1,"Error in FFTRealD::Forward(int,int,OC_REAL8,**,int,int,"
951
	       "MyComplex**): csize2 (=%d) *must* be >= rsize2 (=%d)\n",
952
	       csize2,rsize2);
953

    
954
  if(csize1<(rsize1/2)+1)
955
    PlainError(1,"Error in FFTRealD::Forward(int,int,double**,int,int,"
956
	       "MyComplex**): csize1 (=%d) *must* be >= (rsize1/2)+1"
957
	       " (=%d)\n",csize1,(rsize1/2)+1);
958

    
959
  Setup(OC_MAX(1,2*(csize1-1)),csize2);
960
  if(vecsize1<1 || vecsize2<1) return; // Nothing to do
961

    
962
  // Check for 1D degenerate cases
963
  if(vecsize1==1 || vecsize2==1) {
964
    Forward1D(rsize1,rsize2,rarr,csize1,csize2,carr);
965
  } else {
966
    // Determine which Forward routine to call (ForwardCR or ForwardRC)
967
    // (Use double arithmetic to protect against integer overflow.  If
968
    // the two times are very close, then the choice doesn't really
969
    // matter.)
970
    // 1) Estimated (proportional) time for ForwardCR
971
    REALWIDE crtime=REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize2)
972
      + REALWIDE(vecsize1*rsize2)*REALWIDE(logsize1);
973
    // 2) Estimated (proportional) time for ForwardRC
974
    REALWIDE rctime=REALWIDE(rsize1*vecsize2)*REALWIDE(logsize2)
975
      + REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize1);
976
    // Introduce empirical adjustment factor
977
    rctime*=CRRCspeedratio;
978
    if(crtime<=rctime) ForwardCR(rsize1,rsize2,rarr,csize1,csize2,carr);
979
    else               ForwardRC(rsize1,rsize2,rarr,csize1,csize2,carr);
980
  }
981
}
982

    
983
void FFTReal2D::Inverse(int csize1,int csize2,
984
			const MyComplex* const* carr,
985
			int rsize1,int rsize2,double** rarr)
986
{
987
  if(csize2<rsize2) 
988
    PlainError(1,"Error in FFTRealD::Inverse(int,int,double**,"
989
	       "int,int,MyComplex**): csize2 (=%d) *must* be >="
990
	       " rsize2 (=%d)\n",csize2,rsize2);
991

    
992
  if(csize1<(rsize1/2)+1) 
993
    PlainError(1,"Error in FFTRealD::Inverse(int,int,double**,"
994
	       "int,int,MyComplex**): csize1 (=%d) *must* be >="
995
	       " (rsize1/2)+1 (=%d)\n",csize1,(rsize1/2)+1);
996

    
997
  SetupInverse(OC_MAX(1,2*(csize1-1)),csize2);
998
  if(vecsize1<1 || vecsize2<1) return; // Nothing to do
999

    
1000
  // Check for 1D degenerate cases
1001
  if(vecsize1==1 || vecsize2==1) {
1002
    Inverse1D(csize1,csize2,carr,rsize1,rsize2,rarr);
1003
  } else {
1004
    // Determine which Inverse routine to call (InverseRC or InverseCR)
1005
    // (Use double arithmetic to protect against integer overflow.
1006
    // If the two times are very close, then the choice doesn't really
1007
    // matter.)
1008
    // 1) Estimated (proportional) time for InverseRC (==ForwardCR)
1009
    REALWIDE irctime=REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize2)
1010
      + REALWIDE(vecsize1*rsize2)*REALWIDE(logsize1);
1011
    // 2) Estimated (proportional) time for InverseCR (==ForwardRC)
1012
    REALWIDE icrtime=REALWIDE(rsize1*vecsize2)*REALWIDE(logsize2)
1013
      + REALWIDE(vecsize1*vecsize2)*REALWIDE(logsize1);
1014
    // Introduce empirical adjustment factor
1015
    icrtime*=CRRCspeedratio;
1016
    if(irctime<=icrtime) InverseRC(csize1,csize2,carr,rsize1,rsize2,rarr);
1017
    else                 InverseCR(csize1,csize2,carr,rsize1,rsize2,rarr);
1018
  }
1019
}
1020

    
1021

    
1022
#ifdef USE_MPI
1023

    
1024
static FFT fft1_mpi,fft2_mpi;
1025
static int vecsize1_mpi(0),vecsize2_mpi(0);
1026
static MyComplex* scratch_mpi(NULL);
1027
static MyComplex** workarr_mpi(NULL);
1028

    
1029
FFTReal2D_mpi::FFTReal2D_mpi()
1030
{
1031
}
1032

    
1033
void SetupMemory_mpi(int size1,int size2)
1034
{
1035
  if(size1!=vecsize1_mpi) {
1036
    if(scratch_mpi!=NULL) delete[] scratch_mpi;
1037
    vecsize1_mpi=size1;
1038
    scratch_mpi=new MyComplex[vecsize1_mpi];
1039
  }
1040
  if(size2!=vecsize2_mpi) {
1041
    if(workarr_mpi!=NULL) {
1042
      delete[] workarr_mpi[0];
1043
      delete[] workarr_mpi;
1044
    }
1045
    vecsize2_mpi=size2;
1046
    int rowcount=(vecsize1_mpi/2)+1;
1047
    workarr_mpi=new MyComplex*[rowcount];
1048
    workarr_mpi[0]=new MyComplex[rowcount*vecsize2_mpi];
1049
    for(int i=1;i<rowcount;i++)
1050
      workarr_mpi[i]=workarr_mpi[i-1]+vecsize2_mpi;
1051
  }
1052
}
1053

    
1054
void ReleaseMemory_mpi()
1055
{
1056
  fft1_mpi.ReleaseMemory();
1057
  fft2_mpi.ReleaseMemory();
1058
  if(scratch_mpi!=NULL) delete[] scratch_mpi;
1059
  scratch_mpi=NULL;
1060
  vecsize1_mpi=0;
1061
  if(workarr_mpi!=NULL) {
1062
    delete[] workarr_mpi[0];
1063
    delete[] workarr_mpi;
1064
  }
1065
  workarr_mpi=NULL;
1066
  vecsize2_mpi=0;
1067
}
1068

    
1069
void FFTReal2D_mpi::ReleaseMemory()
1070
{
1071
  Mmsolve_MpiWakeUp(ReleaseMemory_mpi);  // On slaves
1072
  ReleaseMemory_mpi();                   // On master
1073
}
1074

    
1075
static int vecsize1_mpi_b(0),vecsize2_mpi_b(0);
1076
static MyComplex **work1_mpi(NULL),**work2_mpi(NULL);
1077

    
1078
static void SetupMemory_mpi_base_b(int size1,int size2)
1079
{
1080
  int i;
1081
  if(size1!=vecsize1_mpi_b || size2!=vecsize2_mpi_b) {
1082
    if(work1_mpi!=NULL) {
1083
      delete[] work1_mpi[0];
1084
      delete[] work1_mpi;
1085
    }
1086
    if(work2_mpi!=NULL) {
1087
      delete[] work2_mpi[0];
1088
      delete[] work2_mpi;
1089
    }
1090
    vecsize1_mpi_b=size1;
1091
    vecsize2_mpi_b=size2;
1092
    work1_mpi=new MyComplex*[vecsize1_mpi_b];
1093
    work1_mpi[0]=new MyComplex[vecsize1_mpi_b*vecsize2_mpi_b];
1094
    for(i=1;i<vecsize1_mpi_b;i++)
1095
      work1_mpi[i]=work1_mpi[i-1]+vecsize2_mpi_b;
1096
    int rowcount=vecsize2_mpi_b/2;
1097
    work2_mpi=new MyComplex*[rowcount];
1098
    work2_mpi[0]=new MyComplex[rowcount*vecsize1_mpi_b];
1099
    for(i=1;i<rowcount;i++)
1100
      work2_mpi[i]=work2_mpi[i-1]+vecsize1_mpi_b;
1101
  }
1102
}
1103

    
1104
static void SetupMemory_mpi_slave_b()
1105
{
1106
  int size1,size2;
1107
  MPI_Bcast(&size1,1,MPI_INT,0,MPI_COMM_WORLD);
1108
  MPI_Bcast(&size2,1,MPI_INT,0,MPI_COMM_WORLD);
1109
  if(size1<1 || size2<1)
1110
    PlainError(1,"Error propagating array size info in "
1111
	       "SetupMemory_mpi_slave_b(): size1=%d, size2=%d",
1112
	       size1,size2);
1113
  SetupMemory_mpi_base_b(size1,size2);
1114
}
1115

    
1116
static void SetupMemory_mpi_master_b(int size1,int size2)
1117
{
1118
  Mmsolve_MpiWakeUp(SetupMemory_mpi_slave_b);
1119
  MPI_Bcast(&size1,1,MPI_INT,0,MPI_COMM_WORLD);
1120
  MPI_Bcast(&size2,1,MPI_INT,0,MPI_COMM_WORLD);
1121
  SetupMemory_mpi_base_b(size1,size2);
1122
}
1123

    
1124
void ForwardFFT1_mpi_slave_b()
1125
{
1126
  // Get data
1127
  int rowcount,colcount;
1128
  colcount=vecsize2_mpi_b;
1129
  MPI_Request request[1];
1130
  MPI_Status status[1];
1131
  MPI_Irecv(&rowcount,1,MPI_INT,0,1,MPI_COMM_WORLD,request);
1132
  MPI_Waitall(1,request,status);
1133
  MPI_Irecv(work1_mpi[0],rowcount*colcount,MMS_COMPLEX,0,2,
1134
	   MPI_COMM_WORLD,request);
1135
  MPI_Waitall(1,request,status);
1136

    
1137
  // Transform rows
1138
  for(int i=0;i<rowcount;i++) {
1139
    fft1_mpi.ForwardDecFreq(colcount,work1_mpi[i]);
1140
  }
1141

    
1142
  // Return results
1143
  MPI_Isend(work1_mpi[0],rowcount*colcount,MMS_COMPLEX,0,3,
1144
	   MPI_COMM_WORLD,request);
1145
  MPI_Waitall(1,request,status);
1146
}
1147

    
1148
void ForwardFFT2_mpi_slave_b()
1149
{
1150
  // Get data
1151
  int rowcount,colcount;
1152
  colcount=vecsize1_mpi_b;
1153
  MPI_Request request[1];
1154
  MPI_Status status[1];
1155
  MPI_Irecv(&rowcount,1,MPI_INT,0,1,MPI_COMM_WORLD,request);
1156
  MPI_Waitall(1,request,status);
1157
  MPI_Irecv(work2_mpi[0],rowcount*colcount,MMS_COMPLEX,0,2,
1158
	   MPI_COMM_WORLD,request);
1159
  MPI_Waitall(1,request,status);
1160

    
1161
  // Transform rows
1162
  for(int i=0;i<rowcount;i++) {
1163
    fft2_mpi.ForwardDecFreq(colcount,work2_mpi[i]);
1164
  }
1165

    
1166
  // Return results
1167
  MPI_Isend(work2_mpi[0],rowcount*colcount,MMS_COMPLEX,0,3,
1168
	   MPI_COMM_WORLD,request);
1169
  MPI_Waitall(1,request,status);
1170
}
1171

    
1172
void FFTReal2D_mpi::Forward(int rsize1,int rsize2,
1173
			    const double* const* rarr,
1174
			    int csize1,int csize2,MyComplex** carr)
1175
{ // Computes FFT of rsize1 x rsize2 double** rarr, leaving result in
1176
  // csize1 x csize2 MyComplex** carr.  rsize2 *must* be <= csize2, and
1177
  // rsize1 *must* be <= 2*(csize1-1).  This routine returns only the
1178
  // top half +1 of the transform.  The bottom half is given by
1179
  //      carr[2*(csize1-1)-i][csize2-j]=conj(carr[i][j])
1180
  // for i>=csize1, with the second indices interpreted 'mod csize2'.
1181
  //    The dimensions csize2 and 2*(csize1-1) *must* be powers of 2,
1182
  // but rsize1 and rsize2 do not.  On import, the rarr will be
1183
  // implicitly zero-padded as necessary to fit into the specified
1184
  // output array carr.  To get a non-periodic transform, set
1185
  //
1186
  //    2*(csize1-1) to 2*(first power of 2 >= rsize1), and
1187
  //       csize2    to 2*(first power of 2 >= rsize2).
1188

    
1189
  int i,j;
1190
  FFT_REAL_TYPE x1,y1,x2,y2;
1191
  int vecsize1=OC_MAX(1,2*(csize1-1));
1192
  int vecsize2=csize2;
1193
  for(i=vecsize1;i>2;i/=2)
1194
    if(i%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Forward(int): "
1195
			  "Requested csize1 - 1 (%d - 1) is not a power of 2",
1196
			  csize1);
1197
  for(j=vecsize2;j>2;j/=2)
1198
    if(j%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Forward(int): "
1199
			  "Requested csize2 (%d) is not a power of 2",
1200
			  csize2);
1201
  if(vecsize1==0 || vecsize2==0) return; // Nothing to do
1202
  SetupMemory_mpi_master_b(vecsize1,vecsize2);
1203

    
1204
  // Copy input data into packed complex array
1205
  for(i=0;i<rsize1/2;i++) {
1206
    for(j=0;j<rsize2;j++)
1207
      work1_mpi[i][j]=MyComplex(rarr[2*i][j],rarr[2*i+1][j]);
1208
    for(;j<vecsize2;j++)
1209
      work1_mpi[i][j]=MyComplex(0,0);  // Zero pad
1210
  }
1211
  if(2*i<rsize1) {
1212
    // Odd number of rows.  Pack last with zeros.
1213
    for(j=0;j<rsize2;j++)
1214
      work1_mpi[i][j]=MyComplex(rarr[2*i][j],0.0);
1215
    for(;j<vecsize2;j++)
1216
      work1_mpi[i][j]=MyComplex(0,0);  // Zero pad
1217
  }
1218

    
1219
  // Do FFT across rows
1220
  int proc,proc_count,base_size,base_orphan,whole_size,chunk_size;
1221
  proc_count=mms_mpi_size;
1222
  Mmsolve_MpiWakeUp(ForwardFFT1_mpi_slave_b);
1223
  whole_size=(rsize1+1)/2;
1224
  base_size=whole_size/proc_count;
1225
  base_orphan=whole_size%proc_count;
1226
  i=0;
1227
  chunk_size=base_size+(0<base_orphan?1:0);
1228
  int msg_count=3*(proc_count-1);
1229
  MPI_Request *request=new MPI_Request[msg_count];
1230
  MPI_Status  *status=new MPI_Status[msg_count];
1231
  for(proc=1;proc<proc_count;proc++) {
1232
    i+=chunk_size;
1233
    chunk_size=base_size+(proc<base_orphan?1:0);
1234
    MPI_Isend(&chunk_size,1,MPI_INT,proc,1,MPI_COMM_WORLD,
1235
	      request+3*(proc-1));
1236
    MPI_Isend(work1_mpi[i],chunk_size*vecsize2,MMS_COMPLEX,proc,2,
1237
	     MPI_COMM_WORLD,request+3*(proc-1)+1);
1238
    MPI_Irecv(work1_mpi[i],chunk_size*vecsize2,MMS_COMPLEX,proc,3,
1239
	     MPI_COMM_WORLD,request+3*(proc-1)+2);
1240
  }
1241
  chunk_size=base_size+(0<base_orphan?1:0);
1242
  for(i=0;i<chunk_size;i++) {
1243
    fft1_mpi.ForwardDecFreq(vecsize2,work1_mpi[i]);
1244
  }
1245
  MPI_Waitall(msg_count,request,status);
1246

    
1247
  // Unpack and transpose to prepare for FFT in cross dimension.
1248
  //   We fill the first row of work2_mpi with the first and middle
1249
  // columns from unpacked work1_mpi, because these are real-valued.
1250
  for(i=0;i<(rsize1+1)/2;i++) {
1251
    work2_mpi[0][2*i]
1252
      =MyComplex(work1_mpi[i][0].real(),work1_mpi[i][vecsize2/2].real());
1253
    work2_mpi[0][2*i+1]
1254
      =MyComplex(work1_mpi[i][0].imag(),work1_mpi[i][vecsize2/2].imag());
1255
  }
1256
  for(i=2*i;i<vecsize1;i++) work2_mpi[0][i]=MyComplex(0.,0.); // Zero pad
1257
  // Process rest of the rows.
1258
  for(j=1;j<vecsize2/2;j++) {
1259
    for(i=0;i<(rsize1+1)/2;i++) {
1260
      x1=work1_mpi[i][j].real()/2.;
1261
      y1=work1_mpi[i][j].imag()/2.;
1262
      x2=work1_mpi[i][vecsize2-j].real()/2.;
1263
      y2=work1_mpi[i][vecsize2-j].imag()/2.;
1264
      work2_mpi[j][2*i]=MyComplex(x1+x2,y1-y2);
1265
      work2_mpi[j][2*i+1]=MyComplex(y1+y2,x2-x1);
1266
    }
1267
    for(i=2*i;i<vecsize1;i++) work2_mpi[j][i]=MyComplex(0.,0.); // Zero pad
1268
  }
1269

    
1270

    
1271
  // Do FFT's on transposed matrix
1272
  Mmsolve_MpiWakeUp(ForwardFFT2_mpi_slave_b);
1273
  whole_size=vecsize2/2;
1274
  base_size=whole_size/proc_count;
1275
  base_orphan=whole_size%proc_count;
1276
  i=0;
1277
  chunk_size=base_size+(0<base_orphan?1:0);
1278
  msg_count=3*(proc_count-1);
1279
  for(proc=1;proc<proc_count;proc++) {
1280
    i+=chunk_size;
1281
    chunk_size=base_size+(proc<base_orphan?1:0);
1282
    MPI_Isend(&chunk_size,1,MPI_INT,proc,1,MPI_COMM_WORLD,
1283
	      request+3*(proc-1));
1284
    MPI_Isend(work2_mpi[i],chunk_size*vecsize1,MMS_COMPLEX,proc,2,
1285
	     MPI_COMM_WORLD,request+3*(proc-1)+1);
1286
    MPI_Irecv(work2_mpi[i],chunk_size*vecsize1,MMS_COMPLEX,proc,3,
1287
	     MPI_COMM_WORLD,request+3*(proc-1)+2);
1288
  }
1289
  chunk_size=base_size+(0<base_orphan?1:0);
1290
  for(i=0;i<chunk_size;i++) {
1291
    fft2_mpi.ForwardDecFreq(vecsize1,work2_mpi[i]);
1292
  }
1293
  MPI_Waitall(msg_count,request,status);
1294
  delete[] request;
1295
  delete[] status;
1296

    
1297

    
1298
  // Un-transpose and pack into carr.
1299
  //  First row of work2_mpi needs to be handled separately, because
1300
  // of unique packing described above.
1301
  carr[0][0]=MyComplex(work2_mpi[0][0].real(),0.);
1302
  carr[0][vecsize2/2]=MyComplex(work2_mpi[0][0].imag(),0.);
1303
  carr[vecsize1/2][0]=MyComplex(work2_mpi[0][vecsize1/2].real(),0.);
1304
  carr[vecsize1/2][vecsize2/2]=MyComplex(work2_mpi[0][vecsize1/2].imag(),0.);
1305
  for(i=1;i<vecsize1/2;i++) {
1306
    x1=work2_mpi[0][i].real()/2.;
1307
    y1=work2_mpi[0][i].imag()/2.;
1308
    x2=work2_mpi[0][vecsize1-i].real()/2.;
1309
    y2=work2_mpi[0][vecsize1-i].imag()/2.;
1310
    carr[i][0]=MyComplex(x1+x2,y1-y2);
1311
    carr[i][vecsize2/2]=MyComplex(y1+y2,x2-x1);
1312
  }
1313
  // Process remaining rows
1314
  for(j=1;j<vecsize2/2;j++) {
1315
    carr[0][j]=work2_mpi[j][0];
1316
    carr[0][csize2-j]=conj(work2_mpi[j][0]);
1317
    for(i=1;i<vecsize1/2;i++)
1318
      carr[i][j]=work2_mpi[j][i];
1319
    carr[i][j]=work2_mpi[j][i];
1320
    carr[i][csize2-j]=conj(work2_mpi[j][i]);
1321
    for(i=csize1;i<vecsize1;i++)
1322
      carr[vecsize1-i][csize2-j]=conj(work2_mpi[j][i]);
1323
  }
1324
}
1325

    
1326
void FFTReal2D_mpi::Inverse(int csize1,int csize2,
1327
			    const MyComplex* const* carr,
1328
			    int rsize1,int rsize2,double** rarr)
1329
{
1330
  // Initialization
1331
  int i,j;
1332
  int vecsize1=OC_MAX(1,2*(csize1-1));
1333
  int vecsize2=csize2;
1334
  FFT_REAL_TYPE x1,y1,x2,y2; // FFT unpacking scratch vars
1335
  for(i=vecsize1;i>2;i/=2)
1336
    if(i%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Inverse(int): "
1337
			  "Requested csize1 - 1 (%d - 1) is not a power of 2",
1338
			  csize1);
1339
  for(j=vecsize2;j>2;j/=2)
1340
    if(j%2!=0) PlainError(1,"Error in FFTReal2D_mpi::Inverse(int): "
1341
			  "Requested csize2 (%d) is not a power of 2",
1342
			  csize2);
1343
  if(vecsize1==0 || vecsize2==0) return; // Nothing to do
1344
  SetupMemory_mpi(vecsize1,vecsize2);
1345

    
1346

    
1347
  // Do row inverse FFT's
1348
  // Handle the first & csize1'th row specially.  These rows are
1349
  // the DFT's of real sequences, so they each satisfy the conjugate
1350
  // symmetry condition
1351
  workarr_mpi[0][0]=MyComplex(carr[0][0].real(),carr[csize1-1][0].real());
1352
  for(j=1;j<csize2/2;j++) {
1353
    x1=carr[0][j].real();         y1=carr[0][j].imag();
1354
    x2=carr[csize1-1][j].real();  y2=carr[csize1-1][j].imag();
1355
    workarr_mpi[0][j]        = MyComplex(x1-y2,x2+y1);
1356
    workarr_mpi[0][csize2-j] = MyComplex(x1+y2,x2-y1);
1357
  }
1358
  workarr_mpi[0][csize2/2]=MyComplex(carr[0][csize2/2].real(),
1359
				     carr[csize1-1][csize2/2].real());
1360
  fft2_mpi.InverseDecTime(csize2,workarr_mpi[0],1.);
1361

    
1362
  // iFFT the remaining rows
1363
  for(i=1;i<csize1-1;i++) {
1364
    for(j=0;j<csize2;j++) workarr_mpi[i][j]=carr[i][j];
1365
    fft2_mpi.InverseDecTime(csize2,workarr_mpi[i],1.);
1366
  }
1367

    
1368
  // Now do iFFT's on columns.  These are conj. symmetric, so we
1369
  // process them 2 at a time.  Also, recall the 1st row of workarr
1370
  // contains the iFFT's of the 1st and csize1'th row of the given carr.
1371
  //   Note that csize is guaranteed divisible by 2, so if rsize2 is odd
1372
  // then rsize2+1<=csize2.
1373
  for(j=0;j<rsize2;j+=2) {
1374
    scratch_mpi[0]=
1375
      MyComplex(workarr_mpi[0][j].real(),workarr_mpi[0][j+1].real());
1376
    scratch_mpi[csize1-1]=
1377
      MyComplex(workarr_mpi[0][j].imag(),workarr_mpi[0][j+1].imag());
1378
    for(i=1;i<csize1-1;i++) {
1379
      x1 =workarr_mpi[i][j].real();
1380
      y1 =workarr_mpi[i][j].imag();
1381
      x2 =workarr_mpi[i][j+1].real();
1382
      y2 =workarr_mpi[i][j+1].imag();
1383
      scratch_mpi[i]          = MyComplex(x1-y2,x2+y1);
1384
      scratch_mpi[vecsize1-i] = MyComplex(x1+y2,x2-y1);
1385
    }
1386
    fft1_mpi.InverseDecTime(vecsize1,scratch_mpi,
1387
			    FFT_REAL_TYPE(vecsize1*vecsize2));
1388
    if(j+1<rsize2) {
1389
      for(i=0;i<rsize1;i++) {
1390
	rarr[i][j]=scratch_mpi[i].real();
1391
	rarr[i][j+1]=scratch_mpi[i].imag();
1392
      }
1393
    } else {
1394
      for(i=0;i<rsize1;i++) rarr[i][j]=scratch_mpi[i].real();
1395
    }
1396
  }
1397
}
1398

    
1399

    
1400
#endif /* USE_MPI */
Redmine Appliance - Powered by TurnKey Linux