During the realization of the course work I have to write MPI program to solve PDE continuum mechanics. (FORTRAN)
In parallel program a big computational 3D domain (grid MxNxL) is shared between processes. Every process has it's own subdomain for computation(grid NXxNYxNZ = (M/P1)x(N/P2)x(L/P3) , P1*P2*P3 = P - number of processes). After every computation step I have to exchange two border layers between neighbor processes or (it is not important) in case if I don't have a neighbor towards I have to process the border conditions. I have wrote the following implementation, but I dont know how to get rid of extra data shuffling. I want to accelerate my code as possible.
if (XB2 /= MPI_PROC_NULL) then ! If process has right-X neighbor process
call CopyToSb(NX-1,NX,1,NY,1,NZ,XPkgSize)
else
call CopyXLevel(NX,NX+1,-1) ! BORDER PROCESSING (NOT IMPORTANT)
end if
call MPI_SENDRECV_REPLACE(BPtmp, 7*XPkgSize, MPI_REAL4, XB2, 1, XB1, 1, COMM_CART, MPI_STATUS_IGNORE, ierr)
if (XB1 /= MPI_PROC_NULL) then ! If process has left-X neighbor process
call CopyFromSb(-1,0,1,NY,1,NZ,XPkg,XPkgSize)
call CopyToSb(1,2,1,NY,1,NZ,XPkgSize)
else
call CopyXLevel(1,0,1) ! BORDER PROCESSING (NOT IMPORTANT)
end if
call MPI_SENDRECV_REPLACE(BPtmp, 7*XPkgSize, MPI_REAL4, XB1, 2, XB2, 2, COMM_CART, MPI_STATUS_IGNORE, ierr)
if (XB2 /= MPI_PROC_NULL) call CopyFromSb(NX+1,NX+2,1,NY,1,NZ,XPkg,XPkgSize) ! If process has right-X neighbor process
if(iam /= YB1) then ! Because we have cyclic grid on Y direction
if (YB2 /= MPI_PROC_NULL) call CopyToSb(1,NX,NY-1,NY,1,NZ,YPkgSize) ! If process has right-Y neighbor process
call MPI_SENDRECV_REPLACE(BPtmp, 7*YPkgSize, MPI_REAL4, YB2, 3, YB1, 3, COMM_CART, MPI_STATUS_IGNORE, ierr)
if (YB1 /= MPI_PROC_NULL) then ! If process has left-Y neighbor process
call CopyFromSb(1,NX,-1,0,1,NZ,YPkg,YPkgSize)
call CopyToSb(1,NX,1,2,1,NZ,YPkgSize)
end if
call MPI_SENDRECV_REPLACE(BPtmp, 7*YPkgSize, MPI_REAL4, YB1, 4, YB2, 4, COMM_CART, MPI_STATUS_IGNORE, ierr)
if (YB2 /= MPI_PROC_NULL) call CopyFromSb(1,NX,NY+1,NY+2,1,NZ,YPkg,YPkgSize) ! If process has right-Y neighbor process
else
call CopyYLevel(1,NY+1,1) ! BORDER PROCESSING (NOT IMPORTANT)
call CopyYLevel(NY,0,1) ! BORDER PROCESSING (NOT IMPORTANT)
call CopyYLevel(2,NY+2,1) ! BORDER PROCESSING (NOT IMPORTANT)
call CopyYLevel(NY-1,-1,1) ! BORDER PROCESSING (NOT IMPORTANT)
end if
if (ZB2 /= MPI_PROC_NULL) then
call CopyToSb(1,NX,1,NY,NZ-1,NZ,ZPkgSize)
else
call CopyZLevel(NZ,NZ+1,-1) ! BORDER PROCESSING (NOT IMPORTANT)
end if
call MPI_SENDRECV_REPLACE(BPtmp, 7*ZPkgSize, MPI_REAL4, ZB2, 5, ZB1, 5, COMM_CART, MPI_STATUS_IGNORE, ierr)
if (ZB1 /= MPI_PROC_NULL) then
call CopyFromSb(1,NX,1,NY,-1,0,ZPkg,ZPkgSize)
call CopyToSb(1,NX,1,NY,1,2,ZPkgSize)
else
call CopyZLevel(1,0,-1) ! BORDER PROCESSING (NOT IMPORTANT)
end if
call MPI_SENDRECV_REPLACE(BPtmp, 7*ZPkgSize, MPI_REAL4, ZB1, 6, ZB2, 6, COMM_CART, MPI_STATUS_IGNORE, ierr)
if (ZB2 /= MPI_PROC_NULL) call CopyFromSb(1,NX,1,NY,NZ+1,NZ+2,ZPkg,ZPkgSize)
contains
! This is copyToSendBuffer and copyFromSendBuffer functions
! I think it is most problem part of my code.
subroutine CopyToSb(x1,x2,y1,y2,z1,z2,PkgSize)
integer :: x1,x2,y1,y2,z1,z2,PkgSize
BPtmp(1:PkgSize) = RESHAPE(R(x1:x2,y1:y2,z1:z2),[PkgSize])
BPtmp(PkgSize+1:2*PkgSize) = RESHAPE(U(x1:x2,y1:y2,z1:z2),[PkgSize])
BPtmp(2*PkgSize+1:3*PkgSize) = RESHAPE(V(x1:x2,y1:y2,z1:z2),[PkgSize])
BPtmp(3*PkgSize+1:4*PkgSize) = RESHAPE(W(x1:x2,y1:y2,z1:z2),[PkgSize])
BPtmp(4*PkgSize+1:5*PkgSize) = RESHAPE(P(x1:x2,y1:y2,z1:z2),[PkgSize])
BPtmp(5*PkgSize+1:6*PkgSize) = RESHAPE(H(x1:x2,y1:y2,z1:z2),[PkgSize])
BPtmp(6*PkgSize+1:7*PkgSize) = RESHAPE(S(x1:x2,y1:y2,z1:z2),[PkgSize])
end subroutine CopyToSb
subroutine CopyFromSb(x1,x2,y1,y2,z1,z2,Pkg,PkgSize)
integer, dimension(3) :: Pkg
integer :: x1,x2,y1,y2,z1,z2,PkgSize
R(x1:x2,y1:y2,z1:z2) = RESHAPE(BPtmp(1:PkgSize),Pkg)
U(x1:x2,y1:y2,z1:z2) = RESHAPE(BPtmp(PkgSize+1:2*PkgSize),Pkg)
V(x1:x2,y1:y2,z1:z2) = RESHAPE(BPtmp(2*PkgSize+1:3*PkgSize),Pkg)
W(x1:x2,y1:y2,z1:z2) = RESHAPE(BPtmp(3*PkgSize+1:4*PkgSize),Pkg)
P(x1:x2,y1:y2,z1:z2) = RESHAPE(BPtmp(4*PkgSize+1:5*PkgSize),Pkg)
H(x1:x2,y1:y2,z1:z2) = RESHAPE(BPtmp(5*PkgSize+1:6*PkgSize),Pkg)
S(x1:x2,y1:y2,z1:z2) = RESHAPE(BPtmp(6*PkgSize+1:7*PkgSize),Pkg)
end subroutine CopyFromSb
end
The problem is that I have to excahnge slices of array like 2*NY*NZ or NX*2*NZ or NX*NY*2 but in fact I have (-1:NX+2, -1:NY+2, -1:NZ+2) arrays. It is noncontiguous problem. I know what MPI derived datatypes is, but I need some advices(examples) on my way.