!=================================================================================
!
! Module read_matrix
!
! (1) read_matrix_d()           Originally by JRD       Last Modified 5/1/2008 (JRD)
!
! This program reads a distributed matrix like chimat or epsmat to file.
!
! (2) read_matrix_f()           Originally by JRD       Last Modified 9/10/2010 (gsm)
!
! Modification of read_matrix_d for full-frequency.
!
!=================================================================================

#include "f_defs.h"

module read_matrix_m

  use global_m
  use scalapack_m
  use epsread_hdf5_m
#ifdef HDF5
  use hdf5
#endif

  implicit none
  
  private

  public :: &
    read_matrix_d, &
    read_matrix_d_hdf5, &
    read_matrix_f, &
    read_matrix_f_hdf5


contains

subroutine read_matrix_d(scal,matrix,nmtx,iunit)
  type (scalapack), intent(in) :: scal
  SCALAR, intent(out) :: matrix(:,:) !< (scal%npr,scal%npc)
  integer, intent(in) :: nmtx
  integer, intent(in) :: iunit

  PUSH_SUB(read_matrix_d)

  call read_matrix_d_(scal,matrix,nmtx,iunit=iunit)

  POP_SUB(read_matrix_d)

end subroutine read_matrix_d


subroutine read_matrix_d_hdf5(scal,matrix,nmtx,fname,iq,is)
  type (scalapack), intent(in) :: scal
  SCALAR, intent(out) :: matrix(:,:) !< (scal%npr,scal%npc)
  integer, intent(in) :: nmtx
  character(len=*), intent(in) :: fname
  integer, intent(in) :: iq
  integer, intent(in) :: is

  PUSH_SUB(read_matrix_d_hdf5)

  call read_matrix_d_(scal,matrix,nmtx,fname=fname,iq=iq,is=is)

  POP_SUB(read_matrix_d_hdf5)

end subroutine read_matrix_d_hdf5


subroutine read_matrix_d_(scal,matrix,nmtx,iunit,fname,iq,is)
  type (scalapack), intent(in) :: scal
  SCALAR, intent(out) :: matrix(:,:) !< (scal%npr,scal%npc)
  integer, intent(in) :: nmtx
  integer, intent(in), optional :: iunit
  character(len=*), intent(in), optional :: fname
  integer, intent(in), optional :: iq
  integer, intent(in), optional :: is
  
  integer :: ii, jj
  
#ifdef USESCALAPACK
  SCALAR, allocatable :: tempcol(:)
  integer :: irow, icol, irowm, icolm
  integer :: icurr
#endif
  logical :: use_hdf5
  
  PUSH_SUB(read_matrix_d_)

  if (.not.present(iunit).and..not.(present(fname).and.present(iq))) then
    call die("Not enough arguments to read_matrix_d_", only_root_writes=.true.)
  endif
  if (present(iunit).and.(present(fname).or.present(iq))) then
    call die("Too many arguments to read_matrix_d_", only_root_writes=.true.)
  endif
  if ((present(fname).or.present(iq)).and..not.(present(fname).and.present(iq))) then
    call die("Inconsistent arguments to read_matrix_d_", only_root_writes=.true.)
  endif
  use_hdf5 = present(fname).and.present(iq)
#ifndef HDF5
  if (use_hdf5) then
    call die("read_matrix_d_ was not compiled with HDF5 support.", only_root_writes=.true.)
  endif
#endif

  if (peinf%verb_debug .and. peinf%inode==0) then
    if (use_hdf5) then
      write(6,*) 'Reading matrix: ', nmtx, fname
    else
      write(6,*) 'Reading matrix: ', nmtx, iunit
    endif
    write(6,*)
  endif
  
#ifdef USESCALAPACK
  
  SAFE_ALLOCATE(tempcol, (nmtx))
  
  icurr=0
  
  do jj = 1, nmtx

!        if (peinf%inode .eq. 0) then
!          write(6,*) ' In loop: ', ii
!        endif

    if (peinf%inode .eq. 0) then
      if (use_hdf5) then
#ifdef HDF5
        call read_eps_matrix_col_hdf5(tempcol,jj,nmtx,iq,is,fname)
#endif
      else
        read(iunit) (tempcol(ii),ii=1,nmtx)
      endif

    endif
    
    call MPI_BCAST(tempcol,nmtx,MPI_SCALAR,0, &
      MPI_COMM_WORLD,mpierr)
    
    icol=MOD(INT(((jj-1)/scal%nbl)+TOL_SMALL),scal%npcol)
    if (icol .eq. scal%mypcol) then
      do ii = 1, nmtx
        irow=MOD(INT(((ii-1)/scal%nbl)+TOL_SMALL),scal%nprow)
        if (irow .eq. scal%myprow) then
          icurr=icurr+1
          icolm=INT((icurr-1)/scal%npr+TOL_SMALL)+1
          irowm=MOD((icurr-1),scal%npr)+1
          matrix(irowm,icolm)=tempcol(ii)
        endif
      enddo
    endif
    
    call MPI_barrier(MPI_COMM_WORLD,mpierr)
    
  enddo
  
  SAFE_DEALLOCATE(tempcol)
  
#else
  
  if(peinf%inode .eq. 0) then
    do jj = 1, nmtx
      if (use_hdf5) then
#ifdef HDF5
        call read_eps_matrix_col_hdf5(matrix(:,jj),jj,nmtx,iq,is,fname)
#endif
      else
        read(iunit) (matrix(ii, jj), ii = 1, nmtx)
      endif
    enddo
  endif
  
#endif
  
  POP_SUB(read_matrix_d_)
  
  return
end subroutine read_matrix_d_


!=================================================================================


!> FHJ: Front end for read_matrix_f_ for Fortran binary files. See that routine for more info.
subroutine read_matrix_f(scal, nfreq, nfreq_in_group, retarded, nmtx, nfreq_group, iunit, advanced)
  type(scalapack), intent(in) :: scal
  integer, intent(in) :: nfreq
  integer, intent(in) :: nfreq_in_group
  complex(DPC), intent(out) :: retarded(:,:,:) !< (nfreq_in_group,scal%npr,scal%npc)
  integer, intent(in) :: nmtx
  integer, intent(in) :: nfreq_group
  integer, intent(in) :: iunit
  complex(DPC), optional, intent(out) :: advanced(:,:,:) !< (nfreq_in_group,scal%npr,scal%npc)

  PUSH_SUB(read_matrix_f)

  call read_matrix_f_(scal, nfreq, nfreq_in_group, retarded, nmtx, nfreq_group, iunit=iunit, advanced=advanced)

  POP_SUB(read_matrix_f)

end subroutine read_matrix_f


!> FHJ: Front end for read_matrix_f_ for HDF5 files. See that routine for more info.
subroutine read_matrix_f_hdf5(scal, nfreq, nfreq_in_group, retarded, nmtx, nfreq_group, fname, iq, is, advanced)
  type(scalapack), intent(in) :: scal
  integer, intent(in) :: nfreq
  integer, intent(in) :: nfreq_in_group
  complex(DPC), intent(out) :: retarded(:,:,:) !< (nfreq_in_group,scal%npr,scal%npc)
  integer, intent(in) :: nmtx
  integer, intent(in) :: nfreq_group
  character(len=*), intent(in) :: fname
  integer, intent(in) :: iq
  integer, intent(in) :: is
  complex(DPC), optional, intent(out) :: advanced(:,:,:) !< (nfreq_in_group,scal%npr,scal%npc)

  PUSH_SUB(read_matrix_f_hdf5)

  call read_matrix_f_(scal, nfreq, nfreq_in_group, retarded, nmtx, nfreq_group, &
    fname=fname, iq=iq, is=is, advanced=advanced)

  POP_SUB(read_matrix_f_hdf5)

end subroutine read_matrix_f_hdf5


!> FHJ: This routines the full-frequency chiR/epsR matrix from a file, and
!! optionally chiA/epsA (note: you shouldn`t really need chiA, ever...)
!! If using HDF5, we only read the retarded part. If legacy
!! Fortran binary, we read the retarded and skip the advanced. The final
!! matrix will be distributed in a ScaLAPACK layout given by scal. Note that
!! this routine is pretty innefficient, but this is not a core component
!! of BGW as it`s only used if you read_chi or use the eps*omega utility.
subroutine read_matrix_f_(scal, nfreq, nfreq_in_group, retarded, nmtx, &
  nfreq_group, iunit, fname, iq, is, advanced)
  type(scalapack), intent(in) :: scal
  integer, intent(in) :: nfreq
  integer, intent(in) :: nfreq_in_group
  complex(DPC), intent(out) :: retarded(:,:,:) !< (scal%npr,scal%npc,nfreq_in_group)
  integer, intent(in) :: nmtx
  integer, intent(in) :: nfreq_group
  integer, intent(in), optional :: iunit
  character(len=*), intent(in), optional :: fname
  integer, intent(in), optional :: iq
  integer, intent(in), optional :: is
  complex(DPC), intent(out), optional :: advanced(:,:,:) !< (scal%npr,scal%npc,nfreq_in_group)

  integer :: ii, jj, ifreq,ifreq_para,freq_grp_ind
#ifdef USESCALAPACK
  complex(DPC), allocatable :: tempcolR(:,:)
  complex(DPC), allocatable :: tempcolA(:,:)
  integer :: irow, icol, irowm, icolm
  integer :: icurr
#endif
  logical :: use_hdf5, want_advanced

  PUSH_SUB(read_matrix_f_)

  want_advanced = .false.
#ifdef CPLX
  want_advanced = present(advanced)
#endif
  if (.not.present(iunit).and..not.(present(fname).and.present(iq))) then
    call die("Not enough arguments to read_matrix_f_", only_root_writes=.true.)
  endif
  if (present(iunit).and.(present(fname).or.present(iq))) then
    call die("Too many arguments to read_matrix_f_", only_root_writes=.true.)
  endif
  if ((present(fname).or.present(iq)).and..not.(present(fname).and.present(iq))) then
    call die("Inconsistent arguments to read_matrix_f_", only_root_writes=.true.)
  endif
  use_hdf5 = present(fname).and.present(iq)
#ifndef HDF5
  if (use_hdf5) then
    call die("read_matrix_f_ was not compiled with HDF5 support.", only_root_writes=.true.)
  endif
#endif
  
  if (peinf%verb_debug .and. peinf%inode==0) then
    if (use_hdf5) then
      write(6,*) ' Reading matrix: ', nmtx, fname
    else
      write(6,*) ' Reading matrix: ', nmtx, iunit
    endif
    write(6,*)
  endif
  
#ifdef USESCALAPACK
  SAFE_ALLOCATE(tempcolR, (nmtx,nfreq))
  if (want_advanced) then
    SAFE_ALLOCATE(tempcolA, (nmtx,nfreq))
  endif
  
  icurr=0
  
  ! FHJ: FIXME: this code make *very* little sense now that freq. is a slow index.
  ! We are reading all freqs and all igs for a given igp, but memory layout is
  ! (ig,igp,iw)
  do jj = 1, nmtx

    if (peinf%inode .eq. 0) then
      if (use_hdf5) then
#ifdef HDF5
        ! FHJ: only read chiR from the hdf5 file. If you wish to read epsR+epsA
        ! in the future, just pass epsA as an optional parameter to this subroutine.
        ! read the epsA in the routine in the future, put that statement back.
        if (want_advanced) then
          call read_eps_matrix_col_f_hdf5(tempcolR, nfreq, jj, nmtx, iq, is, fname, advanced=tempcolA)
        else
          call read_eps_matrix_col_f_hdf5(tempcolR, nfreq, jj, nmtx, iq, is, fname)
        endif
#endif
      else
        do ii = 1, nmtx
          read(iunit) (tempcolR(ii,ifreq),ifreq=1,nfreq)
        enddo
#ifdef CPLX
        if (want_advanced) then
          do ii = 1, nmtx
            read(iunit) (tempcolA(ii,ifreq),ifreq=1,nfreq)
          enddo
        else
          do ii = 1, nmtx
            read(iunit)
          enddo
        endif
#endif
      endif
    endif
    
    call MPI_BCAST(tempcolR, nfreq*nmtx, MPI_COMPLEX_DPC, 0, MPI_COMM_WORLD, mpierr)
    if (want_advanced) then
      call MPI_BCAST(tempcolA, nfreq*nmtx, MPI_COMPLEX_DPC, 0, MPI_COMM_WORLD, mpierr)
    endif
    
    icol=MOD(INT(((jj-1)/scal%nbl)+TOL_SMALL),scal%npcol)
    if (icol .eq. scal%mypcol) then
      do ii = 1, nmtx
        irow=MOD(INT(((ii-1)/scal%nbl)+TOL_SMALL),scal%nprow)
        if (irow .eq. scal%myprow) then
          icurr=icurr+1
          icolm=INT((icurr-1)/scal%npr+TOL_SMALL)+1
          irowm=MOD((icurr-1),scal%npr)+1
          do ifreq=1,nfreq
            freq_grp_ind=mod(ifreq-1,nfreq_group)
            ifreq_para=(ifreq+nfreq_group-1)/nfreq_group
            if (freq_grp_ind .eq. peinf%igroup_f) then
              retarded(irowm,icolm,ifreq_para)=tempcolR(ii,ifreq)
              if (want_advanced) then
                advanced(irowm,icolm,ifreq_para)=tempcolA(ii,ifreq)
              endif
            endif
          enddo
        endif
      enddo
    endif
    
    call MPI_barrier(MPI_COMM_WORLD,mpierr)
  enddo
  
  SAFE_DEALLOCATE(tempcolR)
  if (want_advanced) then
    SAFE_DEALLOCATE(tempcolA)
  endif
  
#else
  
  if(peinf%inode .eq. 0) then
    do jj = 1, nmtx
      if (use_hdf5) then
#ifdef HDF5
        if (want_advanced) then
          call read_eps_matrix_col_f_hdf5(retarded(:,jj,:), nfreq, jj, nmtx, iq, is, fname, advanced=advanced(:,:,jj))
        else
          call read_eps_matrix_col_f_hdf5(retarded(:,jj,:), nfreq, jj, nmtx, iq, is, fname)
        endif
#endif
      else
        do ii = 1, nmtx
          read(iunit) (retarded(ii, jj, ifreq), ifreq = 1, nfreq)
        enddo
#ifdef CPLX
        if (want_advanced) then
          do ii = 1, nmtx
            read(iunit) (advanced(ii, jj, ifreq), ifreq = 1, nfreq)
          enddo
        else
          do ii = 1, nmtx
            read(iunit)
          enddo
        endif
#endif
      endif
    enddo
  endif
  
#endif
  
  POP_SUB(read_matrix_f_)
  
  return
end subroutine read_matrix_f_

end module read_matrix_m
