diff --git a/Makefile b/Makefile index 254727688e8368cb48e5610cccfa075332210294..8f15e484e29b60aee8f4046c1824867c0a8dd9ad 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ EXE = $(SRC:.c=.x) build: $(EXE) -.PHONY: t test test_showenvironment test_run +.PHONY: t test t: test test: $(EXE) run.sh @set -e; \ diff --git a/run.sh b/run.sh index 5ec6e06e827d173f2a71d983af18abe1c757c2dc..13caa6f446dd95b796edee1881434a2bff349d15 100755 --- a/run.sh +++ b/run.sh @@ -25,5 +25,17 @@ mpirun -n 48 -ppn 24 -host h5,h6 $PWD/$1 echo "-- Running on h9,h10 dual nodes" mpirun -n 48 -ppn 24 -host h9,h10 $PWD/$1 + +echo "-- Running on node1 single node" +mpirun -np 16 -host node1 $PWD/$1 + +echo "-- Running on node3 single node" +mpirun -np 16 -host node3 $PWD/$1 + +echo "-- Running on node1,node2 dual nodes" +mpirun -n 16 -ppn 8 -host node1,node2 $PWD/$1 + +echo "-- Running on node3,node4 dual nodes" +mpirun -n 16 -ppn 8 -host node3,node4 $PWD/$1 echo "******************************************************************************" echo -e "\n\n\n\n" diff --git a/test_result.txt b/test_result.txt index 9727808dbdf80949c4987dd3c7998e24307b63a0..e83624ae19ea165d02714b0a087a6a8e57bbbbca 100644 --- a/test_result.txt +++ b/test_result.txt @@ -1,47 +1,47 @@ ****************************************************************************** -Running test_alltoall.x at 2023-04-29 04:37:25 +Running test_alltoall.x at 2023-04-29 05:13:09 ****************************************************************************** Libraries used: - linux-vdso.so.1 => (0x00007ffcbd50b000) - libmpifort.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/libmpifort.so.12 (0x00007ff3bf89a000) - libmpi.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/release/libmpi.so.12 (0x00007ff3be5d4000) - librt.so.1 => /lib64/librt.so.1 (0x00007ff3be3cc000) - libpthread.so.0 => /lib64/libpthread.so.0 (0x00007ff3be1b0000) - libdl.so.2 => /lib64/libdl.so.2 (0x00007ff3bdfac000) - libc.so.6 => /lib64/libc.so.6 (0x00007ff3bdbde000) - libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007ff3bd9c8000) - libfabric.so.1 => /public/apps/intel/oneapi/mpi/2021.2.0/libfabric/lib/libfabric.so.1 (0x00007ff3bd782000) - /lib64/ld-linux-x86-64.so.2 (0x00007ff3bfc58000) + linux-vdso.so.1 => (0x00007ffcd751f000) + libmpifort.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/libmpifort.so.12 (0x00007fc1842e8000) + libmpi.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/release/libmpi.so.12 (0x00007fc183022000) + librt.so.1 => /lib64/librt.so.1 (0x00007fc182e1a000) + libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fc182bfe000) + libdl.so.2 => /lib64/libdl.so.2 (0x00007fc1829fa000) + libc.so.6 => /lib64/libc.so.6 (0x00007fc18262c000) + libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007fc182416000) + libfabric.so.1 => /public/apps/intel/oneapi/mpi/2021.2.0/libfabric/lib/libfabric.so.1 (0x00007fc1821d0000) + /lib64/ld-linux-x86-64.so.2 (0x00007fc1846a6000) -- Running on iSciNat ================================================================================ Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) Running with 48 ranks ================================================================================ - * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 86 ms - * Profiling throughput of 0 GB 384 MB per rank ... time taken in MPI_Alltoall: 0 s 226 ms + * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 88 ms + * Profiling throughput of 0 GB 384 MB per rank ... time taken in MPI_Alltoall: 0 s 216 ms * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 0 s 369 ms - * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 0 s 776 ms + * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 0 s 895 ms ================================================================================ -- Running on h5 single node ================================================================================ Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) Running with 48 ranks ================================================================================ - * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 135 ms + * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 137 ms * Profiling throughput of 0 GB 384 MB per rank ... time taken in MPI_Alltoall: 0 s 274 ms - * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 0 s 548 ms - * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 1 s 93 ms + * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 0 s 547 ms + * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 1 s 94 ms ================================================================================ -- Running on h9 single node ================================================================================ Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) Running with 48 ranks ================================================================================ - * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 130 ms + * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 129 ms * Profiling throughput of 0 GB 384 MB per rank ... time taken in MPI_Alltoall: 0 s 263 ms * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 0 s 528 ms - * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 1 s 41 ms + * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 1 s 45 ms ================================================================================ -- Running on h5,h6 dual nodes ================================================================================ @@ -50,18 +50,58 @@ Libraries used: ================================================================================ * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 204 ms * Profiling throughput of 0 GB 384 MB per rank ... time taken in MPI_Alltoall: 0 s 422 ms - * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 0 s 846 ms - * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 1 s 687 ms + * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 0 s 847 ms + * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 1 s 688 ms ================================================================================ -- Running on h9,h10 dual nodes ================================================================================ Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) Running with 48 ranks ================================================================================ - * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 342 ms - * Profiling throughput of 0 GB 384 MB per rank ... time taken in MPI_Alltoall: 0 s 623 ms - * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 1 s 279 ms - * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 2 s 561 ms + * Profiling throughput of 0 GB 192 MB per rank ... time taken in MPI_Alltoall: 0 s 392 ms + * Profiling throughput of 0 GB 384 MB per rank ... time taken in MPI_Alltoall: 0 s 635 ms + * Profiling throughput of 0 GB 768 MB per rank ... time taken in MPI_Alltoall: 1 s 225 ms + * Profiling throughput of 1 GB 512 MB per rank ... time taken in MPI_Alltoall: 2 s 437 ms +================================================================================ +-- Running on node1 single node +================================================================================ + Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) + Running with 16 ranks +================================================================================ + * Profiling throughput of 0 GB 64 MB per rank ... time taken in MPI_Alltoall: 0 s 44 ms + * Profiling throughput of 0 GB 128 MB per rank ... time taken in MPI_Alltoall: 0 s 85 ms + * Profiling throughput of 0 GB 256 MB per rank ... time taken in MPI_Alltoall: 0 s 175 ms + * Profiling throughput of 0 GB 512 MB per rank ... time taken in MPI_Alltoall: 0 s 349 ms +================================================================================ +-- Running on node3 single node +================================================================================ + Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) + Running with 16 ranks +================================================================================ + * Profiling throughput of 0 GB 64 MB per rank ... time taken in MPI_Alltoall: 0 s 42 ms + * Profiling throughput of 0 GB 128 MB per rank ... time taken in MPI_Alltoall: 0 s 89 ms + * Profiling throughput of 0 GB 256 MB per rank ... time taken in MPI_Alltoall: 0 s 162 ms + * Profiling throughput of 0 GB 512 MB per rank ... time taken in MPI_Alltoall: 0 s 352 ms +================================================================================ +-- Running on node1,node2 dual nodes +================================================================================ + Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) + Running with 16 ranks +================================================================================ + * Profiling throughput of 0 GB 64 MB per rank ... time taken in MPI_Alltoall: 0 s 53 ms + * Profiling throughput of 0 GB 128 MB per rank ... time taken in MPI_Alltoall: 0 s 104 ms + * Profiling throughput of 0 GB 256 MB per rank ... time taken in MPI_Alltoall: 0 s 213 ms + * Profiling throughput of 0 GB 512 MB per rank ... time taken in MPI_Alltoall: 0 s 430 ms +================================================================================ +-- Running on node3,node4 dual nodes +================================================================================ + Compiler type: GCC 4.8.5 20150623 (Red Hat 4.8.5-44) + Running with 16 ranks +================================================================================ + * Profiling throughput of 0 GB 64 MB per rank ... time taken in MPI_Alltoall: 0 s 388 ms + * Profiling throughput of 0 GB 128 MB per rank ... time taken in MPI_Alltoall: 0 s 619 ms + * Profiling throughput of 0 GB 256 MB per rank ... time taken in MPI_Alltoall: 1 s 24 ms + * Profiling throughput of 0 GB 512 MB per rank ... time taken in MPI_Alltoall: 1 s 935 ms ================================================================================ ****************************************************************************** @@ -70,35 +110,47 @@ Libraries used: ****************************************************************************** -Running test_tyy1.x at 2023-04-29 04:41:30 +Running test_tyy1.x at 2023-04-29 05:19:25 ****************************************************************************** Libraries used: - linux-vdso.so.1 => (0x00007ffc431c8000) - libmpifort.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/libmpifort.so.12 (0x00007febf203a000) - libmpi.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/release/libmpi.so.12 (0x00007febf0d74000) - librt.so.1 => /lib64/librt.so.1 (0x00007febf0b6c000) - libpthread.so.0 => /lib64/libpthread.so.0 (0x00007febf0950000) - libdl.so.2 => /lib64/libdl.so.2 (0x00007febf074c000) - libc.so.6 => /lib64/libc.so.6 (0x00007febf037e000) - libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007febf0168000) - libfabric.so.1 => /public/apps/intel/oneapi/mpi/2021.2.0/libfabric/lib/libfabric.so.1 (0x00007febeff22000) - /lib64/ld-linux-x86-64.so.2 (0x00007febf23f8000) + linux-vdso.so.1 => (0x00007ffdaff59000) + libmpifort.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/libmpifort.so.12 (0x00007f2048dbd000) + libmpi.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/release/libmpi.so.12 (0x00007f2047af7000) + librt.so.1 => /lib64/librt.so.1 (0x00007f20478ef000) + libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f20476d3000) + libdl.so.2 => /lib64/libdl.so.2 (0x00007f20474cf000) + libc.so.6 => /lib64/libc.so.6 (0x00007f2047101000) + libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f2046eeb000) + libfabric.so.1 => /public/apps/intel/oneapi/mpi/2021.2.0/libfabric/lib/libfabric.so.1 (0x00007f2046ca5000) + /lib64/ld-linux-x86-64.so.2 (0x00007f204917b000) -- Running on iSciNat -Rank 0, time with communication: 11.400389s. -Rank 0, time without communication: 0.000022s. --- Running on h5 single node -Rank 0, time with communication: 2.208974s. +Rank 0, time with communication: 11.702381s. Rank 0, time without communication: 0.000031s. +-- Running on h5 single node +Rank 0, time with communication: 2.236161s. +Rank 0, time without communication: 0.000035s. -- Running on h9 single node -Rank 0, time with communication: 16.315345s. -Rank 0, time without communication: 0.000030s. --- Running on h5,h6 dual nodes -Rank 0, time with communication: 0.699251s. +Rank 0, time with communication: 15.647274s. Rank 0, time without communication: 0.000031s. +-- Running on h5,h6 dual nodes +Rank 0, time with communication: 0.654246s. +Rank 0, time without communication: 0.000028s. -- Running on h9,h10 dual nodes -Rank 0, time with communication: 1.742161s. -Rank 0, time without communication: 0.000055s. +Rank 0, time with communication: 1.763536s. +Rank 0, time without communication: 0.000025s. +-- Running on node1 single node +Rank 0, time with communication: 1.810400s. +Rank 0, time without communication: 0.000038s. +-- Running on node3 single node +Rank 0, time with communication: 4.862942s. +Rank 0, time without communication: 0.000044s. +-- Running on node1,node2 dual nodes +Rank 0, time with communication: 1.658208s. +Rank 0, time without communication: 0.000054s. +-- Running on node3,node4 dual nodes +Rank 0, time with communication: 6.210203s. +Rank 0, time without communication: 0.000069s. ****************************************************************************** @@ -106,45 +158,65 @@ Rank 0, time without communication: 0.000055s. ****************************************************************************** -Running test_tyy2.x at 2023-04-29 04:43:05 +Running test_tyy2.x at 2023-04-29 05:22:16 ****************************************************************************** Libraries used: - linux-vdso.so.1 => (0x00007ffc581d1000) - libmpifort.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/libmpifort.so.12 (0x00007ff05f256000) - libmpi.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/release/libmpi.so.12 (0x00007ff05df90000) - librt.so.1 => /lib64/librt.so.1 (0x00007ff05dd88000) - libpthread.so.0 => /lib64/libpthread.so.0 (0x00007ff05db6c000) - libdl.so.2 => /lib64/libdl.so.2 (0x00007ff05d968000) - libc.so.6 => /lib64/libc.so.6 (0x00007ff05d59a000) - libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007ff05d384000) - libfabric.so.1 => /public/apps/intel/oneapi/mpi/2021.2.0/libfabric/lib/libfabric.so.1 (0x00007ff05d13e000) - /lib64/ld-linux-x86-64.so.2 (0x00007ff05f614000) + linux-vdso.so.1 => (0x00007ffe6f9cb000) + libmpifort.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/libmpifort.so.12 (0x00007f1a7e143000) + libmpi.so.12 => /public/apps/intel/oneapi/mpi/2021.2.0/lib/release/libmpi.so.12 (0x00007f1a7ce7d000) + librt.so.1 => /lib64/librt.so.1 (0x00007f1a7cc75000) + libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f1a7ca59000) + libdl.so.2 => /lib64/libdl.so.2 (0x00007f1a7c855000) + libc.so.6 => /lib64/libc.so.6 (0x00007f1a7c487000) + libgcc_s.so.1 => /lib64/libgcc_s.so.1 (0x00007f1a7c271000) + libfabric.so.1 => /public/apps/intel/oneapi/mpi/2021.2.0/libfabric/lib/libfabric.so.1 (0x00007f1a7c02b000) + /lib64/ld-linux-x86-64.so.2 (0x00007f1a7e501000) -- Running on iSciNat -Rank 0, alltoallcplx with comm: 0.290525s. -Rank 0, alltoallcplx with comm: 4.339673s. -Rank 0, alltoallint with comm: 0.241709s. -Rank 0, alltoallint with comm: 4.676460s. +Rank 0, alltoallcplx with comm: 0.269693s. +Rank 0, alltoallcplx with comm: 4.413800s. +Rank 0, alltoallint with comm: 0.243539s. +Rank 0, alltoallint with comm: 4.772937s. -- Running on h5 single node -Rank 0, alltoallcplx with comm: 0.326536s. -Rank 0, alltoallcplx with comm: 1.420922s. -Rank 0, alltoallint with comm: 0.155849s. -Rank 0, alltoallint with comm: 1.423225s. +Rank 0, alltoallcplx with comm: 0.248864s. +Rank 0, alltoallcplx with comm: 1.425570s. +Rank 0, alltoallint with comm: 0.155677s. +Rank 0, alltoallint with comm: 1.422398s. -- Running on h9 single node -Rank 0, alltoallcplx with comm: 0.109262s. -Rank 0, alltoallcplx with comm: 9.138584s. -Rank 0, alltoallint with comm: 0.118206s. -Rank 0, alltoallint with comm: 9.854484s. +Rank 0, alltoallcplx with comm: 0.109989s. +Rank 0, alltoallcplx with comm: 9.632547s. +Rank 0, alltoallint with comm: 0.111610s. +Rank 0, alltoallint with comm: 9.651179s. -- Running on h5,h6 dual nodes -Rank 0, alltoallcplx with comm: 0.160791s. -Rank 0, alltoallcplx with comm: 0.553479s. -Rank 0, alltoallint with comm: 0.380782s. -Rank 0, alltoallint with comm: 0.561677s. +Rank 0, alltoallcplx with comm: 0.142842s. +Rank 0, alltoallcplx with comm: 0.520999s. +Rank 0, alltoallint with comm: 0.410757s. +Rank 0, alltoallint with comm: 0.543206s. -- Running on h9,h10 dual nodes -Rank 0, alltoallcplx with comm: 0.252528s. -Rank 0, alltoallcplx with comm: 1.479089s. -Rank 0, alltoallint with comm: 0.137398s. -Rank 0, alltoallint with comm: 1.477327s. +Rank 0, alltoallcplx with comm: 0.236817s. +Rank 0, alltoallcplx with comm: 1.442059s. +Rank 0, alltoallint with comm: 0.147068s. +Rank 0, alltoallint with comm: 1.440483s. +-- Running on node1 single node +Rank 0, alltoallcplx with comm: 0.311662s. +Rank 0, alltoallcplx with comm: 1.230030s. +Rank 0, alltoallint with comm: 0.283278s. +Rank 0, alltoallint with comm: 1.187194s. +-- Running on node3 single node +Rank 0, alltoallcplx with comm: 0.951608s. +Rank 0, alltoallcplx with comm: 3.167985s. +Rank 0, alltoallint with comm: 0.797970s. +Rank 0, alltoallint with comm: 3.027122s. +-- Running on node1,node2 dual nodes +Rank 0, alltoallcplx with comm: 0.307534s. +Rank 0, alltoallcplx with comm: 1.198317s. +Rank 0, alltoallint with comm: 0.496882s. +Rank 0, alltoallint with comm: 1.138275s. +-- Running on node3,node4 dual nodes +Rank 0, alltoallcplx with comm: 1.202530s. +Rank 0, alltoallcplx with comm: 6.652988s. +Rank 0, alltoallint with comm: 0.656113s. +Rank 0, alltoallint with comm: 4.514347s. ******************************************************************************