Babbage - PHASTA/phasta GitHub Wiki
The following instructions are for execution on the Babbage Phi/MIC only; the host processors are not used.
See the Babbage page for system details https://www.nersc.gov/users/computational-systems/testbeds/babbage/
module swap impi/5.0.update1 impi/2016.beta_5.1.0 module swap intel/15.0.update1 intel/2016.beta module load cmake export I_MPI_CC=icc export I_MPI_CXX=icpc export I_MPI_FC=ifort
Create 'BabbagePhi.cmake' with the following contents
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_C_COMPILER icc)
set(CMAKE_CXX_COMPILER icpc)
set(CMAKE_Fortran_COMPILER ifort)
set(CMAKE_AR /usr/bin/ar CACHE STRING "" FORCE)
set(d "/opt/intel/parallel_studio_xe_2015_update2/compilers_and_libraries_2016.0.042/linux")
set(compilermic "${d}/compiler/lib/mic")
set(mklmic "${d}/mkl/lib/mic")
set(cxxflags "-mmic -Wl,-rpath-link=${compilermic} -Wl,--as-needed ")
set(opt "-opt-assume-safe-padding -opt-streaming-stores always -opt-streaming-cache-evict=0")
set(CMAKE_C_FLAGS -mmic CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "${cxxflags}" CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS -mmic CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS_RELEASE "-mmic ${opt} -align array64byte " CACHE STRING "" FORCE)
set(CMAKE_FIND_ROOT_PATH
/usr/linux-k1om-4.7/linux-k1om/usr/lib64/
/usr/linux-k1om-4.7/linux-k1om/usr/
${d}/mpi/mic/lib/release_mt
${d}/mpi/mic/lib/
${d}/mpi/mic/)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
mkdir buildPhi cd buildPhi
cmake \ -DCMAKE_TOOLCHAIN_FILE=../BabbagePhi.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DPHASTA_INCOMPRESSIBLE=OFF \ -DPHASTA_COMPRESSIBLE=ON \ ..
make VERBOSE=1
see above
Create 'runPhi.sh' with the following contents
#!/bin/bash #Force use of the rendezvous protocol to reduce memory usage. export I_MPI_FABRICS=shm:dapl export I_MPI_DEBUG=5 #Force use of the rendezvous protocol to reduce memory usage. export I_MPI_EAGER_THRESHOLD=0 export I_MPI_INTRANODE_EAGER_THRESHOLD=0 #connectionless protocol - see the google doc for performance implications export I_MPI_DAPL_UD=enable #pin processes to cores export I_MPI_PIN_DOMAIN=core cd $PBS_O_WORKDIR get_micfile mpirun.mic -n <totalProcesses> -ppn <processesPerMIC> -hostfile micfile.$PBS_JOBID <executable>
or... if you want to do multiple runs within a job/allocation place the following into 'runPhi.sh'
#!/bin/bash -x
p=totalNumberOfProcesses
ppn=processesPerPhi
numstart=startingTimeStep
cd $PBS_O_WORKDIR
module swap impi/5.0.update1 impi/2016.beta_5.1.0
module swap intel/15.0.update1 intel/2016.beta
export I_MPI_FABRICS=shm:dapl
export I_MPI_DEBUG=5
#Force use of the rendezvous protocol to reduce memory usage.
export I_MPI_EAGER_THRESHOLD=0
export I_MPI_INTRANODE_EAGER_THRESHOLD=0
#pin processes to cores
export I_MPI_PIN_DOMAIN=core
dbg=/path/to/debug/phastaC.exe
opt=/path/to/optimized/phastaC.exe
get_micfile
for exe in dbg opt; do
for ud in 'disable' 'enable'; do
#connectionless protocol
export I_MPI_DAPL_UD=${ud}
echo $numstart > ${p}-procs_case/numstart.dat
mpirun.mic -n $p -hostfile micfile.$PBS_JOBID -ppn $ppn ${!exe} \
&> r${p}.ppn${ppn}.ud${ud}.${exe}.${PBS_JOBID}.log
done
done
Note, '-n' specifies the total number of host processes. Since we are not running on the host processes we will set '-n' to equal the number of nodes; a value of zero is not valid.
cd path/to/case/directory # should contain a N-procs_case sub-directory qsub -l nodes=numNodes -l walltime=HH:MM:SS ./runPhi.sh