!===============================================================================
! Copyright 2020-2022 Intel Corporation.
!
! This software and the related documents are Intel copyrighted  materials,  and
! your use of  them is  governed by the  express license  under which  they were
! provided to you (License).  Unless the License provides otherwise, you may not
! use, modify, copy, publish, distribute,  disclose or transmit this software or
! the related documents without Intel's prior written permission.
!
! This software and the related documents  are provided as  is,  with no express
! or implied  warranties,  other  than those  that are  expressly stated  in the
! License.
!===============================================================================

!  Content:
!      Intel(R) oneAPI Math Kernel Library (oneMKL)
!      FORTRAN OpenMP offload examples for DOMATCOPY_BATCH_STRIDED
!*******************************************************************************

include "mkl_omp_offload.f90"
include "common_blas.f90"

program domatcopy_batch_strided_example
#if defined(MKL_ILP64)
use onemkl_blas_omp_offload_ilp64
#else
use onemkl_blas_omp_offload_lp64
#endif
use common_blas  

character*1 :: ordering = 'C', trans = 'T'
integer :: row = 5, col = 3, batch_size = 10
integer :: lda, ldb, stridea, strideb, passed
double precision :: alpha = 1.2
double precision,allocatable :: a(:,:), b(:,:), b_ref(:,:)

lda = row
ldb = col
stridea = lda * col
strideb = ldb * row

allocate(a(stridea,batch_size))
allocate(b(strideb,batch_size))
allocate(b_ref(strideb,batch_size))

if (.not. allocated(a)) goto 998
if (.not. allocated(b)) then
   deallocate(a)
   goto 998
end if
if (.not. allocated(b_ref)) then
   deallocate(a)
   deallocate(b)
   goto 998
end if

! initialize matrices
call dinit_matrix('N', stridea, batch_size, stridea, a)
call dinit_matrix('N', strideb, batch_size, strideb, b)
call dcopy_matrix(strideb, batch_size, strideb, b, b_ref)

! Calling domatcopy_batch_strided on the CPU
call mkl_domatcopy_batch_strided(ordering, trans, row, col, alpha, a, lda, stridea, b_ref, ldb, strideb, batch_size)

! Calling domatcopy_batch_strided on the GPU
!$omp target data map(a,b)
!$omp dispatch
call mkl_domatcopy_batch_strided(ordering, trans, row, col, alpha, a, lda, stridea, b, ldb, strideb, batch_size)
!$omp end target data

! Compare result of CPU and GPU implementation

passed = dcheck_matrix(strideb, batch_size, strideb, b, b_ref)

deallocate(a)
deallocate(b)
deallocate(b_ref)

if (passed.ne.0) then
   goto 999
else
   print *, "PASSED"
end if


stop

998 print *, 'Error: cannot allocate matrices' 
999 stop 1
end program
