! ==============================================================================
! Subroutine: EWALD_PM (ECOUL,FTOT)
! 
! Purpose: Computes the reciprocal portion of the Ewald sum for electrostatic
!          energy, forces, and virial tensor using the smooth particle
!          mesh Ewald method.
!
! Method: Essmann et al. "A smooth particle mesh Ewald method".
!         J. Chem. Phys., 8577 (1995)
!
! Arguments:
!
!           EREC - Porition of the Coulomb energy comming from the
!                  reciprocial Ewald sum.
!           FTOT - Array of dimension (3,NAT) containing the force
!                  force on each atom.
!           VIR  - Array of dimension (3,3) containing the virial
!                  tensor with elements given by:
!                  S_ab = SUM_i Fia * Rib where Fia = ath
!                  component of the force on atom i.
!
! Special Notes:
!
!           CMAT - Real array of dimension K1 X K2 X K3. Computed by the
!                  subroutine PMESETUP.
!           MREC - Array of length 3 containing the number of FFT
!                  elements. These are the dimensions of the matrix
!                  CMAT.
!           NORD - Order of the Cardinal B-Spline interpolation.
!
! History:
!
! Version    Date         Comment
! --------   ----------   -----------------------
!            10/01/2010   Original Code
!
! Dependancies:
!
! Modules - SystemParam
! Functions -
! Subroutines - BSPLINE
!
! Author(s): Eric Dykeman
!
! ==============================================================================

      SUBROUTINE EWALD_PM (EREC,FTOT)

        USE SystemParam, ONLY : rat,q,vir,rlv,beta,mrec,nat,nord,&
                              & icou,ipbc,cmat

        IMPLICIT NONE

        !=== ARGUMENTS ===!

        DOUBLE PRECISION, INTENT(OUT) :: erec
        DOUBLE PRECISION, INTENT(INOUT) :: ftot(3,nat)

        !=== VARIABLES ===!

        INTEGER, DIMENSION(:), ALLOCATABLE :: iwrk
        DOUBLE COMPLEX, DIMENSION(:), ALLOCATABLE :: cwrk
        DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: rwrk

        INTEGER :: i,j,k,m1,m2,m3,k1,k2,k3,ktot
        INTEGER :: mp1,mp2,mp3,kx1,kx2,kx3,n2,n3
        INTEGER :: mx1(nat),mx2(nat),mx3(nat)

        DOUBLE COMPLEX :: qmat(mrec(1),mrec(2),mrec(3))
        DOUBLE COMPLEX :: qstr,cmpx

        DOUBLE PRECISION :: x1,x2,x3,g1,g2,g3,gsq,arg
        DOUBLE PRECISION :: s11,s12,s13,s22,s23,s33,c,c1
        DOUBLE PRECISION :: bsp1(nord,nat),dbsp1(nord,nat)
        DOUBLE PRECISION :: bsp2(nord,nat),dbsp2(nord,nat)
        DOUBLE PRECISION :: bsp3(nord,nat),dbsp3(nord,nat)

        DOUBLE PRECISION, PARAMETER :: pi = 3.14159265358979323846d0


        erec = 0.0d0

        IF ( icou == 0 ) RETURN


        !=== Constants ===!

        c1 = pi / beta
        c1 = c1 * c1

        k1 = mrec(1)
        k2 = mrec(2)
        k3 = mrec(3)

        kx1 = k1 / 2
        kx2 = k2 / 2
        kx3 = k3 / 2

        ktot = k1 * k2 * k3

        n2 = nord * nord
        n3 = nord * n2

        i = 6 * MAX(k1,k2,k3) + 150
        ALLOCATE (iwrk(i),rwrk(i))

        i = MAX(k2,k3)
        ALLOCATE (cwrk(i))


        !=== Reciprocial Sum ===!

        !=== Get Q matrix for FFT ===!

        qmat = (0.0d0,0.0d0)

        DO i=1,nat

          x1 = rlv(1,1) * rat(1,i) + rlv(1,2) * rat(2,i) &
           & + rlv(1,3) * rat(3,i)
          x2 = rlv(2,1) * rat(1,i) + rlv(2,2) * rat(2,i) &
           & + rlv(2,3) * rat(3,i)
          x3 = rlv(3,1) * rat(1,i) + rlv(3,2) * rat(2,i) &
           & + rlv(3,3) * rat(3,i)

          m1 = INT(x1)
          m2 = INT(x2)
          m3 = INT(x3)

          IF ( x1 < 0.0d0 ) m1 = m1 - 1
          IF ( x2 < 0.0d0 ) m2 = m2 - 1
          IF ( x3 < 0.0d0 ) m3 = m3 - 1

          x1 = ( x1 - DBLE(m1) ) * DBLE(k1)
          x2 = ( x2 - DBLE(m2) ) * DBLE(k2)
          x3 = ( x3 - DBLE(m3) ) * DBLE(k3)

          mx1(i) = INT(x1)
          mx2(i) = INT(x2)
          mx3(i) = INT(x3)

          x1 = x1 - DBLE(mx1(i))
          x2 = x2 - DBLE(mx2(i))
          x3 = x3 - DBLE(mx3(i))

          CALL BSPLINE (x1,bsp1(:,i),dbsp1(:,i),nord)
          CALL BSPLINE (x2,bsp2(:,i),dbsp2(:,i),nord)
          CALL BSPLINE (x3,bsp3(:,i),dbsp3(:,i),nord)

          DO j=1,n3

            k = j - 1

            m3 = k / n2
            k = k - m3 * n2

            m2 = k / nord
            m1 = k - m2 * nord

            mp1 = mx1(i) - m1 + 1
            mp2 = mx2(i) - m2 + 1
            mp3 = mx3(i) - m3 + 1

            k = 1 + IABS(mp1) / k1
            IF ( mp1 <= 0 ) mp1 = mp1 + k * k1

            k = 1 + IABS(mp2) / k2
            IF ( mp2 <= 0 ) mp2 = mp2 + k * k2

            k = 1 + IABS(mp3) / k3
            IF ( mp3 <= 0 ) mp3 = mp3 + k * k3

            c = q(i) * bsp1(m1+1,i)
            c = c * bsp2(m2+1,i)
            c = c * bsp3(m3+1,i)

            cmpx = DCMPLX(c,0.0d0)
            qmat(mp1,mp2,mp3) = qmat(mp1,mp2,mp3) + cmpx

          ENDDO

        ENDDO


        !=== Reciprocal Energy and Virial ===!

        CALL FFT3D (qmat,k1,k2,k1,k2,k3,-1,iwrk,rwrk,cwrk)

        DO i=1,ktot

          j = i - 1
          k = k1 * k2

          m3 = j / k
          j = j - m3 * k

          m2 = j / k1
          m1 = j - m2 * k1

          mp1 = m1 + 1
          mp2 = m2 + 1
          mp3 = m3 + 1

          IF ( m1 > kx1 ) m1 = m1 - k1
          IF ( m2 > kx2 ) m2 = m2 - k2
          IF ( m3 > kx3 ) m3 = m3 - k3

          k = IABS(m1) + IABS(m2) + IABS(m3)

          c = DBLE(ktot) * cmat(mp1,mp2,mp3)

          cmpx = DCMPLX(c,0.0d0)
          qstr = DCONJG(qmat(mp1,mp2,mp3))

          qmat(mp1,mp2,mp3) = cmpx * qmat(mp1,mp2,mp3)

          arg = qstr * qmat(mp1,mp2,mp3)
          arg = DBLE(ktot) * arg

          erec = erec + arg

          IF ( ipbc /= 2 .or. k == 0 ) CYCLE

          g1 = DBLE(m1) * rlv(1,1) + DBLE(m2) * rlv(2,1) &
           & + DBLE(m3) * rlv(3,1)
          g2 = DBLE(m1) * rlv(1,2) + DBLE(m2) * rlv(2,2) &
           & + DBLE(m3) * rlv(3,2)
          g3 = DBLE(m1) * rlv(1,3) + DBLE(m2) * rlv(2,3) &
           & + DBLE(m3) * rlv(3,3)

          gsq = g1 * g1 + g2 * g2 + g3 * g3

          arg = arg * ( 1.0d0 / gsq + c1 )

          x1 = arg * g1
          x2 = arg * g2
          x3 = arg * g3

          s11 = x1 * g1
          s12 = x1 * g2
          s13 = x1 * g3
          s22 = x2 * g2
          s23 = x2 * g3
          s33 = x3 * g3

          vir(1,1) = vir(1,1) - s11
          vir(2,1) = vir(2,1) - s12
          vir(3,1) = vir(3,1) - s13
          vir(1,2) = vir(1,2) - s12
          vir(2,2) = vir(2,2) - s22
          vir(3,2) = vir(3,2) - s23
          vir(1,3) = vir(1,3) - s13
          vir(2,3) = vir(2,3) - s23
          vir(3,3) = vir(3,3) - s33

        ENDDO

        erec = 0.50d0 * erec

        IF ( ipbc == 2 ) THEN

          vir(1,1) = vir(1,1) + erec
          vir(2,2) = vir(2,2) + erec
          vir(3,3) = vir(3,3) + erec

        ENDIF


        !=== Reciprocal Forces ===!

        CALL FFT3D (qmat,k1,k2,k1,k2,k3,1,iwrk,rwrk,cwrk)

        DO i=1,nat

          g1 = DBLE(k1) * q(i)
          g2 = DBLE(k2) * q(i)
          g3 = DBLE(k3) * q(i)

          DO j=1,n3

            k = j - 1

            m3 = k / n2
            k = k - m3 * n2

            m2 = k / nord
            m1 = k - m2 * nord

            mp1 = mx1(i) - m1 + 1
            mp2 = mx2(i) - m2 + 1
            mp3 = mx3(i) - m3 + 1

            k = 1 + IABS(mp1) / k1
            IF ( mp1 <= 0 ) mp1 = mp1 + k * k1

            k = 1 + IABS(mp2) / k2
            IF ( mp2 <= 0 ) mp2 = mp2 + k * k2

            k = 1 + IABS(mp3) / k3
            IF ( mp3 <= 0 ) mp3 = mp3 + k * k3

            c = DBLE(qmat(mp1,mp2,mp3))

            x1 = g1 * dbsp1(m1+1,i)
            x2 = g2 * dbsp2(m2+1,i)
            x3 = g3 * dbsp3(m3+1,i)

            x1 = x1 * bsp2(m2+1,i)
            x2 = x2 * bsp1(m1+1,i)
            x3 = x3 * bsp1(m1+1,i)

            x1 = x1 * bsp3(m3+1,i)
            x2 = x2 * bsp3(m3+1,i)
            x3 = x3 * bsp2(m2+1,i)

            arg = x1 * rlv(1,1) + x2 * rlv(2,1) + x3 * rlv(3,1)
            ftot(1,i) = ftot(1,i) - c * arg

            arg = x1 * rlv(1,2) + x2 * rlv(2,2) + x3 * rlv(3,2)
            ftot(2,i) = ftot(2,i) - c * arg

            arg = x1 * rlv(1,3) + x2 * rlv(2,3) + x3 * rlv(3,3)
            ftot(3,i) = ftot(3,i) - c * arg

          ENDDO

        ENDDO


        !=== Deallocate Work Arrays ===!

        DEALLOCATE (iwrk,cwrk,rwrk)

        RETURN

      END SUBROUTINE EWALD_PM
