Babbage - PHASTA/phasta GitHub Wiki
The following instructions are for execution on the Babbage Phi/MIC only; the host processors are not used.
See the Babbage page for system details https://www.nersc.gov/users/computational-systems/testbeds/babbage/
module swap impi/5.0.update1 impi/2016.beta_5.1.0 module swap intel/15.0.update1 intel/2016.beta module load cmake export I_MPI_CC=icc export I_MPI_CXX=icpc export I_MPI_FC=ifort
Create 'BabbagePhi.cmake' with the following contents
set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_C_COMPILER icc) set(CMAKE_CXX_COMPILER icpc) set(CMAKE_Fortran_COMPILER ifort) set(CMAKE_AR /usr/bin/ar CACHE STRING "" FORCE) set(d "/opt/intel/parallel_studio_xe_2015_update2/compilers_and_libraries_2016.0.042/linux") set(compilermic "${d}/compiler/lib/mic") set(mklmic "${d}/mkl/lib/mic") set(cxxflags "-mmic -Wl,-rpath-link=${compilermic} -Wl,--as-needed ") set(opt "-opt-assume-safe-padding -opt-streaming-stores always -opt-streaming-cache-evict=0") set(CMAKE_C_FLAGS -mmic CACHE STRING "" FORCE) set(CMAKE_CXX_FLAGS "${cxxflags}" CACHE STRING "" FORCE) set(CMAKE_Fortran_FLAGS -mmic CACHE STRING "" FORCE) set(CMAKE_C_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE) set(CMAKE_CXX_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE) set(CMAKE_Fortran_FLAGS_RELEASE "-mmic ${opt} -align array64byte " CACHE STRING "" FORCE) set(CMAKE_FIND_ROOT_PATH /usr/linux-k1om-4.7/linux-k1om/usr/lib64/ /usr/linux-k1om-4.7/linux-k1om/usr/ ${d}/mpi/mic/lib/release_mt ${d}/mpi/mic/lib/ ${d}/mpi/mic/) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
mkdir buildPhi cd buildPhi
cmake \ -DCMAKE_TOOLCHAIN_FILE=../BabbagePhi.cmake \ -DCMAKE_BUILD_TYPE=Release \ -DPHASTA_INCOMPRESSIBLE=OFF \ -DPHASTA_COMPRESSIBLE=ON \ ..
make VERBOSE=1
see above
Create 'runPhi.sh' with the following contents
#!/bin/bash #Force use of the rendezvous protocol to reduce memory usage. export I_MPI_FABRICS=shm:dapl export I_MPI_DEBUG=5 #Force use of the rendezvous protocol to reduce memory usage. export I_MPI_EAGER_THRESHOLD=0 export I_MPI_INTRANODE_EAGER_THRESHOLD=0 #connectionless protocol - see the google doc for performance implications export I_MPI_DAPL_UD=enable #pin processes to cores export I_MPI_PIN_DOMAIN=core cd $PBS_O_WORKDIR get_micfile mpirun.mic -n <totalProcesses> -ppn <processesPerMIC> -hostfile micfile.$PBS_JOBID <executable>
or... if you want to do multiple runs within a job/allocation place the following into 'runPhi.sh'
#!/bin/bash -x p=totalNumberOfProcesses ppn=processesPerPhi numstart=startingTimeStep cd $PBS_O_WORKDIR module swap impi/5.0.update1 impi/2016.beta_5.1.0 module swap intel/15.0.update1 intel/2016.beta export I_MPI_FABRICS=shm:dapl export I_MPI_DEBUG=5 #Force use of the rendezvous protocol to reduce memory usage. export I_MPI_EAGER_THRESHOLD=0 export I_MPI_INTRANODE_EAGER_THRESHOLD=0 #pin processes to cores export I_MPI_PIN_DOMAIN=core dbg=/path/to/debug/phastaC.exe opt=/path/to/optimized/phastaC.exe get_micfile for exe in dbg opt; do for ud in 'disable' 'enable'; do #connectionless protocol export I_MPI_DAPL_UD=${ud} echo $numstart > ${p}-procs_case/numstart.dat mpirun.mic -n $p -hostfile micfile.$PBS_JOBID -ppn $ppn ${!exe} \ &> r${p}.ppn${ppn}.ud${ud}.${exe}.${PBS_JOBID}.log done done
Note, '-n' specifies the total number of host processes. Since we are not running on the host processes we will set '-n' to equal the number of nodes; a value of zero is not valid.
cd path/to/case/directory # should contain a N-procs_case sub-directory qsub -l nodes=numNodes -l walltime=HH:MM:SS ./runPhi.sh