! ==============================================================================
! Subroutine: PMESETUP
! 
! Purpose: Sets up the constant real K1 X K2 X K3 matrix required for the
!          convolution operation in the particle mesh Ewald method. Generally
!          this subroutine only needs to be called once UNLESS the primitive
!          reciprocal lattice vectors change.
!
! Method: Essmann et al. "A smooth particle mesh Ewald method".
!         J. Chem. Phys., 8577 (1995)
!
! Arguments:
!
!           CMAT - Real array of dimension K1 X K2 X K3. On OUTPUT,
!                  contains the array given by the formula:
!
!                  C(m1,m2,m3) = |A1(m1)|^2 * |A2(m2)|^2 * |A3(m3)|^2 *
!                          (1/PI*V) * EXP( -PI^2*G^2 / 4*BETA^2 ) / G^2
!                  where
!
!                  INV[Ai(mi)] = 
!                  SUM_{k=1,n-1} Mn(k) * EXP (2 PI i*mi*[k-n] / Ki )
!
!                  and Mn(u) is a Cardinal B-Spline of order n
!
!           MREC - Array of length 3 containing the number of FFT
!                  elements. These are the dimensions of the matrix CMAT.
!           NORD - Order of the Cardinal B-Spline interpolation.
!
! History:
!
! Version    Date         Comment
! --------   ----------   -----------------------
!            10/01/2010   Original Code
!
! Dependancies:
!
! Modules - SystemParam
! Functions -
! Subroutines - CLOSEMPI
!
! Author(s): Eric Dykeman
!
! ==============================================================================

      SUBROUTINE PMESETUP

        USE SystemParam, ONLY : cmat,tole,beta,vol,rlv,mrec,&
                              & nord,myproc,mpijob

        IMPLICIT NONE

        !=== ARGUMENTS ==!

        !=== VARIABLES ===!

        INTEGER :: i,j,k,k1,k2,k3,m1,m2,m3
        INTEGER :: kx1,kx2,kx3,mp1,mp2,mp3
        INTEGER :: ktot,i2(11),i3(7),i5(5)

        DOUBLE PRECISION :: x1,x2,x3,g1,g2,g3,c1,c2,c3
        DOUBLE PRECISION :: ar,ai,br,bi,cr,ci,arg,gsq
        DOUBLE PRECISION :: glo,ghi,gm,gcut,bsp(nord)

        INTEGER, PARAMETER :: kmax = 1024
        DOUBLE PRECISION, PARAMETER :: pi = 3.14159265358979323846d0

        DATA (i5(i),i=1,5) / 1,5,25,125,625 /
        DATA (i3(i),i=1,7) / 1,3,9,27,81,243,729 /
        DATA (i2(i),i=1,11)/ 1,2,4,8,16,32,64,128,256,512,1024 /


        IF ( ALLOCATED (cmat) ) DEALLOCATE(cmat)

        !=== Constants ===!

        c1 = 2.0d0 * pi

        c2 = pi / beta
        c2 = c2 * c2

        c3 = vol * pi
        c3 = 1.0d0 / c3

        !=== Find optimal number of FFT points ===!

        !=== Bracket Solution ===!

        i = 1
        ghi = 1.0d0
        arg = ghi * c2
        arg = DEXP(-arg) / ghi

        DO WHILE ( arg > tole )

          i = i + 1
          ghi = 2.0d0 * ghi
          arg = ghi * c2
          arg = DEXP(-arg) / ghi

        ENDDO

        !=== Find Squared Reciprocal Cutoff ===!

        j = i + 50
        glo = 0.0d0

        DO i=1,j

          gm = 0.50d0 * ( glo + ghi )
          arg = gm * c2
          arg = DEXP(-arg) / gm

          IF ( arg > tole ) THEN

            glo = gm

          ELSE

            ghi = gm

          ENDIF

        ENDDO

        gcut = DSQRT(gm)

        !=== Compute Lengths of Rec Vectors ===!

        g1 = rlv(1,1) * rlv(1,1) + rlv(1,2) * rlv(1,2) &
         & + rlv(1,3) * rlv(1,3)
        g2 = rlv(2,1) * rlv(2,1) + rlv(2,2) * rlv(2,2) &
         & + rlv(2,3) * rlv(2,3)
        g3 = rlv(3,1) * rlv(3,1) + rlv(3,2) * rlv(3,2) &
         & + rlv(3,3) * rlv(3,3)

        g1 = DSQRT(g1)
        g2 = DSQRT(g2)
        g3 = DSQRT(g3)

        arg = gcut * vol

        gm = arg * g2 * g3
        m1 = 2 * INT(gm) + 1

        gm = arg * g1 * g3
        m2 = 2 * INT(gm) + 1

        gm = arg * g1 * g2
        m3 = 2 * INT(gm) + 1

        !=== Get Optimal Size of FFT ===!

        i = 0
        IF ( m1 > kmax ) i = 1
        IF ( m2 > kmax ) i = 1
        IF ( m3 > kmax ) i = 1

        IF ( i == 1 ) THEN

          IF ( myproc == 0 ) THEN
          WRITE(4,*)'ERROR: Maximum number of PM grid points in each'
          WRITE(4,*)'direction is kmax = ',kmax
          WRITE(4,*)'Requested number of grid points:'
          WRITE(4,*)'kx, ky, kz = ',m1,m2,m3
          ENDIF

          IF ( mpijob ) CALL CLOSEMPI

          STOP

        ENDIF

        k1 = kmax
        k2 = kmax
        k3 = kmax

        DO i=1,385

          j = i - 1

          mp3 = j / 77
          j = j - mp3 * 77

          mp2 = j / 11
          mp1 = j - mp2 * 11

          k = i2(mp1+1) * i3(mp2+1) * i5(mp3+1)

          k = MIN(k,kmax)

          IF ( k >= m1 ) k1 = MIN(k,k1)
          IF ( k >= m2 ) k2 = MIN(k,k2)
          IF ( k >= m3 ) k3 = MIN(k,k3)

        ENDDO

        mrec(1) = k1
        mrec(2) = k2
        mrec(3) = k3

        kx1 = k1 / 2
        kx2 = k2 / 2
        kx3 = k3 / 2

        ktot = k1 * k2 * k3

        ALLOCATE (cmat(k1,k2,k3))


        !=== Cardinal Spline Points ===!

        bsp = 0.0d0

        i = 2
        bsp(1) = 1.0d0

        DO WHILE ( i < nord )

          i = i + 1

          x1 = DBLE(i)
          x2 = 0.0d0

          DO j=i,2,-1

            bsp(j) = x1 * bsp(j) + x2 * bsp(j-1)

            bsp(j) = bsp(j) / DBLE(i-1)

            x1 = x1 - 1.0d0
            x2 = x2 + 1.0d0

          ENDDO

          bsp(1) = bsp(1) / DBLE(i-1)

        ENDDO


        !=== Form PME Matrix ===!

        DO i=1,ktot

          j = i - 1
          k = k1 * k2

          m3 = j / k
          j = j - m3 * k

          m2 = j / k1
          m1 = j - m2 * k1

          mp1 = m1 + 1
          mp2 = m2 + 1
          mp3 = m3 + 1

          IF ( m1 > kx1 ) m1 = m1 - k1
          IF ( m2 > kx2 ) m2 = m2 - k2
          IF ( m3 > kx3 ) m3 = m3 - k3

          cmat(mp1,mp2,mp3) = 0.0d0

          j = IABS(m1) + IABS(m2) + IABS(m3)

          IF ( j == 0 ) CYCLE

          IF ( MODULO(nord,2) == 1 ) THEN

            IF ( m1 == kx1 ) CYCLE
            IF ( m2 == kx2 ) CYCLE
            IF ( m3 == kx3 ) CYCLE

          ENDIF

          !=== Get A1 A2 and A3 ===!

          x1 = c1 * DBLE(m1) / DBLE(k1)
          x2 = c1 * DBLE(m2) / DBLE(k2)
          x3 = c1 * DBLE(m3) / DBLE(k3)

          j = 1 - nord

          g1 = DBLE(j) * x1
          g2 = DBLE(j) * x2
          g3 = DBLE(j) * x3

          ar = 0.0d0
          ai = 0.0d0
          br = 0.0d0
          bi = 0.0d0
          cr = 0.0d0
          ci = 0.0d0

          DO j=1,nord-1

            ar = ar + bsp(j) * DCOS(g1)
            ai = ai + bsp(j) * DSIN(g1)

            br = br + bsp(j) * DCOS(g2)
            bi = bi + bsp(j) * DSIN(g2)

            cr = cr + bsp(j) * DCOS(g3)
            ci = ci + bsp(j) * DSIN(g3)

            g1 = g1 + x1
            g2 = g2 + x2
            g3 = g3 + x3

          ENDDO

          x1 = ar * ar + ai * ai
          x2 = br * br + bi * bi
          x3 = cr * cr + ci * ci

          x1 = 1.0d0 / x1
          x2 = 1.0d0 / x2
          x3 = 1.0d0 / x3

          g1 = DBLE(m1) * rlv(1,1) + DBLE(m2) * rlv(2,1) &
           & + DBLE(m3) * rlv(3,1)
          g2 = DBLE(m1) * rlv(1,2) + DBLE(m2) * rlv(2,2) &
           & + DBLE(m3) * rlv(3,2)
          g3 = DBLE(m1) * rlv(1,3) + DBLE(m2) * rlv(2,3) &
           & + DBLE(m3) * rlv(3,3)

          gsq = g1 * g1 + g2 * g2 + g3 * g3

          arg = c2 * gsq

          arg = c3 * DEXP(-arg) / gsq

          arg = x1 * x2 * x3 * arg

          cmat(mp1,mp2,mp3) = arg

        ENDDO

        RETURN

      END SUBROUTINE PMESETUP
