/* [+MEQ MatlabEQuilibrium Toolbox+] Swiss Plasma Center EPFL Lausanne 2022. All rights reserved. */
# include "meq.h"
# ifdef SINGLE
# define FCT sgszr
# else
# define FCT dgszr
# endif
# define IND(i, j, n) ((i)+(n)*(j))
# define GIND(v, i, j) (v + IND(i, j, nz))
# define CIND(v, i, l) (v + IND(i, l, nr2))

/* function p = gsp(p,cq,cr,cs,nr1,l,ip)
 * pp = p(ip,nr2);
 * for j = nr1-2:-1:1
 * pp = p(ip,j) + cr(j,l)*pp;
 * p(ip,j) = pp;
 * end
 * pp = cq(1,l)*p(ip,1);
 * p(ip,1) = pp;
 * for j = 2:nr2
 * pp = cq(j,l)*p(ip,j) + cs(j,l)*pp;
 * p(ip,j) = pp;
 * end
 * end */
# define GSP \
{ FLT pp = *--pi, *crl = CIND(cr, nr2-1, l), *cql = CIND(cq, 0, l), *csl = CIND(cs, 1, l); \
          for (j = nr2; --j; ) pp = *--pi += *--crl * pp; \
          *pi = pp = *cql++ * *pi; pi++; \
                  for (j = nr2; --j; pi++) pp = *pi = *cql++ * *pi + *csl++ * pp; \
}
/* function Fx = gsu(Fx,p,nr1,i,ip)
 * j = 2:nr1;
 * Fx(i,j) = Fx(i,j) + p(ip,:);
 * end */
# define GSU for (j = nr2, f = GIND(Fx, i, nr2); j--; f -= nz ) *f += *--pi;

/* function Fx = gszr(Fb,Jy,cx,cq,cr,cs,ci,co,dz) */
/* meq.h */
void FCT(FLT *Fx, FLT *Fb, FLT *Jy, FLT *cx,
        FLT *cq, FLT *cr, FLT *cs, FLT ci, FLT co,
        FLT *p, int nz, int nr, FLT dz)
{
 /* [nz,nr] = size(Jy); nz = nz+2: nr = nr+2; nz1 = nz-1; nr1 = nr-1; nr2 = nr1-1;
  * p = zeros(nz1-1,nr2);
  * Fx = zeros(nz,nr);	*/
 int    nz1 = nz-1;
 int    nr1 = nr-1, nr2 = nr1-1;
 int    i, j, l;
 FLT *f, *pi;
 /* i = 2:nz1; j = 2:nr1;
  * Fx(i,j) = repmat(cx',nz-2,1).*Jy; */
 { FLT c, *Jp = Jy, *cxp = cx;
   f = GIND(Fx, 1, 1);
   for (j = nr1; --j;) {
    c = *cxp++;
    for (i = nz1; --i; ) *f++ = *Jp++ * c;
    f += 2;
   }
 }
 /* i = 1:nz;
  * Fx(i,1 )  = Fb(i);
  * Fx(i,nr)  = Fb(i+nz+2*nr2);
  * Fx(i,2 )  = Fx(i,2  ) + ci*Fb(i);
  * Fx(i,nr1) = Fx(i,nr1) + co*Fb(i+nz+2*nr2);
  * j = 2:nr1;
  * Fx( 1,j) = Fb(nz+(1:2:2*nr2));
  * Fx(nz,j) = Fb(nz+(2:2:2*nr2));*/
 { FLT *f1 = Fx, *fb1 = Fb, *fnr = GIND(Fx, 0, nr1), *fbnr = Fb + nz + 2*nr2,	*f2 = GIND(Fx, 0, 1), *fnr1 = GIND(Fx, 0, nr2);
   for (i = nz; i--;) {
    *f1++    = *fb1        ;
    *fnr++   = *fbnr       ;
    *f2++   += *fb1++  * ci;
    *fnr1++ += *fbnr++ * co;
   }
 }
 { FLT *f1 = GIND(Fx, 0, 1), *fnz = GIND(Fx, nz1, 1), *fb = Fb + nz;
   for (j = nr2; j--;) {
    *f1  = *fb++; f1  += nz;
    *fnz = *fb++; fnz += nz;
   }
 }
 /* The four following loops on i can be parallelised, but that requires independant p storage,
  * size nz-2 x nr-2, and pi = p + (i-1)*nr2 */
 /* l = nz1/2;
  * i = 3:2:nz1;
  * ip = i-1;
  * j = 2:nr1;
  * p(ip,:) = 2.*Fx(i,j);
  * Fx(i,j) = Fx(i+1,j)+Fx(i-1,j);
  * p = gsp(p,cq,cr,cs,nr1,l,ip);
  * Fx = gsu(Fx,p,nr1,i,ip); */
 { l = (nz1>>1) - 1;
   for (i = 2; i < nz1; i += 2) {
    pi = p;
    f = GIND(Fx, i, 1);
    for (j = nr2; j--; f += nz) {
     *pi++ = FLTC(2.0) * *f;
     *f = *(f+1) + *(f-1);
    }
    GSP;
    GSU;
   }
 }
 /* lo = nz1/4; li = lo*2; id = 2; ih = 1; io = 4;
  * while lo > 1
  * it = id+ih;
  * i = io+1:io:nz1;
  * ip = i-1;
  * j = 2:nr1;
  * p(ip,:) = Fx(i,j)-Fx(i+it,j)-Fx(i-it,j);
  * Fx(i,j) = Fx(i,j)-Fx(i+ih,j)-Fx(i-ih,j)+Fx(i+id,j)+Fx(i-id,j);
  * p(ip,:) = p(ip,:)+Fx(i,j);
  * for l = lo:li:nz1
  * p = gsp(p,cq,cr,cs,nr1,l,ip);
  * end
  * Fx = gsu(Fx,p,nr1,i,ip);
  * id = id*2; ih = ih*2; io = io*2; lo = lo/2; li = li/2;
  * end */
 int li = nz1>>1, lo = li>>1, id = 2, ih = 1, io = 4, ii;
 { int ii;
   while (lo > 1) {
    ii = id+ih;
    for (i = io; i < nz1; i += io) {
     pi = p;
     f = GIND(Fx, i, 1);
     for (j = nr2; j--; f += nz) {
      *pi = *f - *(f+ii) - *(f-ii);
      *f += *(f+id) + *(f-id) - *(f+ih) - *(f-ih);
      *pi++ += *f;
     }
     for (l = lo-1; l < nz1; l += li) GSP;
     GSU;
    }
    id <<= 1; ih <<= 1; io <<= 1; lo >>= 1; li >>= 1;
   }
 }
 /* ii = io; io = id;
  * while ih >= 1
  * i = io+1:ii:nz1;
  * ip = i-1;
  * j = 2:nr1;
  * p(ip,:) = 2*Fx(i,j)+Fx(i+id,j)+Fx(i-id,j);
  * Fx(i,j) = Fx(i,j)-Fx(i+ih,j)-Fx(i-ih,j);
  * for l = lo:li:nz1
  * p = gsp(p,cq,cr,cs,nr1,l,ip);
  * end
  * Fx = gsu(Fx,p,nr1,i,ip);
  * id = id/2; ih = ih/2; io = io/2; ii = ii/2; lo = lo*2; li = li*2;
  * end */
 {	int ii = io;
   io = id;
   while (ih >= 1) {
    for (i = io; i < nz1; i += ii) {
     pi = p;
     f = GIND(Fx, i, 1);
     for (j = nr2; j--; f += nz) {
      *pi++ = FLTC(2.0) * *f + *(f+id) + *(f-id);
      *f -= *(f+ih) + *(f-ih);
     }
     for (l = lo-1; l < nz1; l += li) GSP;
     GSU;
    }
    id >>= 1; ih >>= 1; io >>= 1; ii >>= 1; lo <<= 1; li <<= 1;
   }
 }
 /* i = 2:2:nz1;
  * ip = i-1;
  * j = 2:nr1;
  * p(ip,:) = 2.*Fx(i,j)+Fx(i+1,j)+Fx(i-1,j);
  * Fx(i,j) = 0.;
  * p = gsp(p,cq,cr,cs,nr1,lo,ip);
  * Fx = gsu(Fx,p,nr1,i,ip); */
 {
  l = lo-1;
  for (i = 1; i < nz1; i += 2) {
   pi = p;
   f = GIND(Fx, i, 1);
   for (j = nr2; j--; f += nz) {
    *pi++ = FLTC(2.0) * *f + *(f+1) + *(f-1);
    *f = FLTC(0.0);
   }
   GSP;
   GSU;
  }
 }
 /* Add dFx/dz * dz */
 if (dz) {
  FLT hdz = FLTC(0.5) * dz, d0, d1, *f1 = Fx;
  for (j = nr; j--; ) {
   f = f1;
   d0 = (*(++f1) - *f) * dz;
   for (i = nz1; --i; ) {
    d1 = (*(++f1) - *f) * hdz;
    *f++ += d0;
    d0 = d1;
   }
   d1 = (*f1 - *f) * dz;
   *f += d0;
   *f1++ += d1;
  }
 }
}
