# HOOMD-blue [TOC] ## Codes ```=bash # Source code git clone --recursive https://github.com/glotzerlab/hoomd-blue # Benchmarks git clone https://github.com/glotzerlab/hoomd-benchmarks ``` ## Conda ```=bash wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh bash Miniconda3-latest-Linux-x86_64.sh conda update -n base conda conda install --solver=classic conda-forge::conda-libmamba-solver conda-forge::libmamba conda-forge::libmambapy conda-forge::libarchive conda create -p hoomdpy312 python=3.12 -y conda activate /home/552/es6155/hoomdpy312 ``` ## Dependencies ```=bash pip install pybind11 numpy gsd sed -i 's|pybind11/archive/v2.10.1.tar.gz|pybind11/archive/v2.13.4.tar.gz|g' /scratch/pi13/hoomd/hoomd-blue/install-prereq-headers.py python3 /scratch/pi13/hoomd/hoomd-blue/install-prereq-headers.py -y ``` ```=bash pip install pybind11 numpy gsd sed -i 's|pybind11/archive/v2.10.1.tar.gz|pybind11/archive/v2.13.4.tar.gz|g' /home/users/industry/ai-hpc/apacsc39/hoomd-blue/install-prereq-headers.py python3 /home/users/industry/ai-hpc/apacsc39/hoomd-blue/install-prereq-headers.py -y ``` ## Compile ```=bash module load openmpi/4.1.5 module load gcc/12.2.0 export CFLAGS="-Ofast -march=native -mtune=native -funroll-loops -fprefetch-loop-arrays -ftree-vectorize -fstack-protector-strong -fno-plt -fPIC -pipe" export CXXFLAGS="$CFLAGS" export LDFLAGS="-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--gc-sections" PATH=hoomdgcc2/bin:$PATH \ cmake -B build/hoomd_gcc_flag -S /scratch/pi13/hoomd/hoomd-blue \ -D ENABLE_MPI=on -D MPI_HOME=/apps/openmpi/4.1.5 \ -D pybind11_DIR=$PWD/hoomdgcc2/lib/python3.12/site-packages/pybind11/share/cmake/pybind11 \ -D Eigen3_DIR=$PWD/hoomdgcc2/share/eigen3/cmake \ -D cereal_DIR=$PWD/hoomdgcc2/lib64/cmake/cereal \ -D CMAKE_C_FLAGS="$CFLAGS" \ -D CMAKE_CXX_FLAGS="$CXXFLAGS" \ -D CMAKE_EXE_LINKER_FLAGS="$LDFLAGS" cmake --build build/hoomd_gcc_flag -j4 #用>4會爆掉 ``` ### Gadi #### gcc11 - openmpi/4.1.4 - cuda/12.5.1 ![image](https://hackmd.io/_uploads/SJwIwyzaR.png) #### Intel 2024.2.0 ```=bash module load intel-compiler-llvm/2024.2.0 intel-mpi/2021.13.0 export CXXFLAGS="-Ofast -march=broadwell -axSKYLAKE-AVX512,CASCADELAKE,SAPPHIRERAPIDS -mtune=native -funroll-loops -flto -qopenmp -qopt-prefetch -qopt-streaming-stores always -fp-model fast -qparallel -fopt-info-vec-all" export CFLAGS="$CXXFLAGS" PATH=hoomdintel/bin:$PATH \ cmake -B build/hoomd_intel_v2 -S /scratch/pi13/hoomd/hoomd-blue \ -D CMAKE_C_COMPILER=icx \ -D CMAKE_CXX_COMPILER=icpx \ -D MPI_C_COMPILER=mpicc \ -D MPI_CXX_COMPILER=mpicxx \ -D ENABLE_MPI=on -D MPI_HOME=$I_MPI_ROOT \ -D pybind11_DIR=$PWD/hoomdintel/lib/python3.12/site-packages/pybind11/share/cmake/pybind11 \ -D Eigen3_DIR=$PWD/hoomdintel/share/eigen3/cmake \ -D cereal_DIR=$PWD/hoomdintel/lib64/cmake/cereal \ -D ENABLE_TBB=on \ -D TBB_DIR=/apps/intel-tools/.packages/2024.2.0/tbb/2021.13/lib/cmake/tbb \ -D CMAKE_CXX_FLAGS="$CXXFLAGS" \ -D CMAKE_C_FLAGS="$CFLAGS" ``` #### GCC+Intel-MPI ```=bash module load intel-mpi/2021.13.0 module load gcc/12.2.0 module load intel-tbb/2021.13.0 export CFLAGS="-Ofast -march=native -mtune=native -funroll-loops -fprefetch-loop-arrays -ftree-vectorize -fstack-protector-strong -fno-plt -fPIC -pipe" export CXXFLAGS="$CFLAGS" export LDFLAGS="-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--gc-sections" PATH=hoomdpy312llvm/bin:$PATH \ cmake -B build/hoomd_gcc_intelmpi -S /scratch/pi13/hoomd/hoomd-blue \ -D ENABLE_MPI=on \ -D MPI_HOME=/apps/intel-tools/intel-mpi/2021.13.0 \ -D pybind11_DIR=$PWD/hoomdpy312llvm/lib/python3.12/site-packages/pybind11/share/cmake/pybind11 \ -D Eigen3_DIR=$PWD/hoomdpy312llvm/share/eigen3/cmake \ -D cereal_DIR=$PWD/hoomdpy312llvm/lib64/cmake/cereal \ -D ENABLE_TBB=on \ -D TBB_DIR=/apps/intel-tools/intel-tbb/2021.13.0/lib/cmake/tbb \ -D CMAKE_C_FLAGS="$CFLAGS" \ -D CMAKE_CXX_FLAGS="$CXXFLAGS" \ -D CMAKE_EXE_LINKER_FLAGS="$LDFLAGS" cmake --build build/hoomd_gcc -j4 ``` ### Aspire #### GCC - openmpi/4.1.5-gcc11 - cuda/12.2.2 ![image](https://hackmd.io/_uploads/rypWPkf6C.png) ```=bash module load gcc/11.2.0 module load openmpi/4.1.5-gcc11 export CFLAGS="-O3 -march=native -mtune=native -funroll-loops -fprefetch-loop-arrays -ftree-vectorize -fstack-protector-strong -fno-plt -fPIC -pipe" export CXXFLAGS="$CFLAGS" export LDFLAGS="-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--gc-sections" PATH=hoomdpy312/bin:$PATH \ cmake -B build/hoomd_gcc -S $PWD/hoomd-blue \ -D ENABLE_MPI=on -D MPI_HOME=/app/apps/openmpi/4.1.5-gcc11 \ -D pybind11_DIR=$PWD/hoomdpy312/lib/python3.12/site-packages/pybind11/share/cmake/pybind11 \ -D Eigen3_DIR=$PWD/hoomdpy312/share/eigen3/cmake \ -D cereal_DIR=$PWD/hoomdpy312/lib64/cmake/cereal \ -D CMAKE_C_FLAGS="$CFLAGS" \ -D CMAKE_CXX_FLAGS="$CXXFLAGS" \ -D CMAKE_EXE_LINKER_FLAGS="$LDFLAGS" cmake --build build/hoomd_gcc -j4 ``` #### AOCC 4.0.0 ```=bash module load aocc/4.0.0 module load openmpi/4.1.5-aocc4 export CXXFLAGS="-O3 -march=znver3 -ffast-math -flto -fopenmp -fvectorize" export CFLAGS="$CXXFLAGS" PATH=hoomdpy312/bin:$PATH \ cmake -B build/hoomd_aocc -S $PWD/hoomd-blue \ -D CMAKE_C_COMPILER=clang \ -D CMAKE_CXX_COMPILER=clang++ \ -D MPI_C_COMPILER=mpicc \ -D MPI_CXX_COMPILER=mpicxx \ -D ENABLE_MPI=on -D MPI_HOME=$MPI_HOME \ -D pybind11_DIR=$PWD/hoomdpy312/lib/python3.12/site-packages/pybind11/share/cmake/pybind11 \ -D Eigen3_DIR=$PWD/hoomdpy312/share/eigen3/cmake \ -D cereal_DIR=$PWD/hoomdpy312/lib64/cmake/cereal \ -D CMAKE_CXX_FLAGS="$CXXFLAGS" \ -D CMAKE_C_FLAGS="$CFLAGS" cmake --build build/hoomd_aocc -j4 ``` #### Cray MPICH ```=bash export CXXFLAGS="-Ofast -h aggress -h vector3 -hfp3" export CFLAGS="$CXXFLAGS" PATH=hoomdmpich/bin:$PATH \ cmake -B build/hoomd_mpich_v2 -S $PWD/hoomd-blue \ -D ENABLE_MPI=on \ -D pybind11_DIR=$PWD/hoomdmpich/lib/python3.12/site-packages/pybind11/share/cmake/pybind11 \ -D Eigen3_DIR=$PWD/hoomdmpich/share/eigen3/cmake \ -D cereal_DIR=$PWD/hoomdmpich/lib64/cmake/cereal cmake --build build/hoomd_mpich_v2 -j4 ``` ## Run scripts - case: hoomd_benchmarks.md_pair_wca - gpu: 2 ### GSD file (N=10000) GSD stands for "General Simulation Data" and is a file format used in scientific simulations to store and transmit particle simulation data. ```=bash #!/bin/bash #PBS -N test #PBS -q gpuvolta #PBS -l ncpus=24 #PBS -l ngpus=2 #PBS -l mem=96gb #PBS -l walltime=00:10:00 #PBS -P pi13 #PBS -j oe #PBS -o output_2.log #PBS -e error.log module load cuda openmpi module load gcc/12.2.0 module load llvm/16.0.4 intel-tbb/2021.13.0 source /home/552/es6155/miniconda3/bin/activate /home/552/es6155/hoomdpy312cuda nvidia-smi export PYTHONPATH=$PWD/build/hoomd_cuda:$PWD/hoomd-benchmarks mpirun \ -map-by ppr:2:node -oversubscribe \ -x PYTHONPATH \ /home/552/es6155/hoomdpy312cuda/bin/python3 \ -m hoomd_benchmarks.md_pair_wca \ --device GPU -N 10000 -v --warmup_steps 0 --benchmark_steps 0 ``` ### Benchmarks 下面那段改掉 (只差在最後一行) ```=bash #!/bin/bash #PBS -N test #PBS -q gpuvolta #PBS -l ncpus=24 #PBS -l ngpus=2 #PBS -l mem=96gb #PBS -l walltime=00:10:00 #PBS -P pi13 #PBS -j oe #PBS -o output_2.log #PBS -e error.log module load cuda openmpi module load gcc/12.2.0 module load llvm/16.0.4 intel-tbb/2021.13.0 source /home/552/es6155/miniconda3/bin/activate /home/552/es6155/hoomdpy312cuda nvidia-smi export PYTHONPATH=$PWD/build/hoomd_cuda:$PWD/hoomd-benchmarks mpirun \ -map-by ppr:2:node -oversubscribe \ -x PYTHONPATH \ /home/552/es6155/hoomdpy312cuda/bin/python3 \ -m hoomd_benchmarks.md_pair_wca \ --device GPU -N 1000000 -v --benchmark_steps 100 ``` ![image](https://hackmd.io/_uploads/HJAjqGYiA.png) ![image](https://hackmd.io/_uploads/HJOaqGKoA.png) ### Intel ```=bash #!/bin/bash #PBS -N test #PBS -q normal #PBS -l ncpus=1536 #PBS -l mem=400gb #PBS -l walltime=00:10:00 #PBS -P pi13 #PBS -j oe #PBS -o intel_v2_1536_2_2_c.log #PBS -e error.log module purge module load intel-compiler-llvm/2024.2.0 intel-mpi/2021.13.0 source /home/552/es6155/miniconda3/bin/activate /home/552/es6155/hoomdintel export PYTHONPATH=$PWD/build/hoomd_intel_v2:$PWD/hoomd-benchmarks export LD_PRELOAD=/apps/intel-tools/intel-mpi/2021.13.0/lib/release/libmpi.so export I_MPI_PIN_DOMAIN=auto export I_MPI_PIN_ORDER=scatter export I_MPI_PIN_PROCESSOR_LIST=allcores export OMP_NUM_THREADS=1 numactl --interleave=all \ mpirun -n 1536 -ppn 2 \ -genv PYTHONPATH $PYTHONPATH \ -genv I_MPI_PIN_DOMAIN $I_MPI_PIN_DOMAIN \ -genv I_MPI_PIN_ORDER $I_MPI_PIN_ORDER \ -genv OMP_NUM_THREADS $OMP_NUM_THREADS \ python3 -m hoomd_benchmarks.md_pair_wca \ --device CPU -v \ -N 200000 \ --warmup 10000 --benchmark_steps 320000 ``` ### Config UCC ```bash= ./configure --prefix=$HOME/ucc/built --with-ucx=$HOME/ucx/built ``` ### Config OpenMPI ```=bash module load intel-compiler-llvm; module load ucx; module load ucc; CC="icx -O3 -Wno-tautological-constant-compare -xHost -Wno-error=incompatible-function-pointer-types" CXX="icpx -O3 -Wno-tautological-constant-compare -xHost -Wno-error=incompatible-function-pointer-types" FC="ifx -O3 -fPIC -xHost" ./configure \ --prefix=$HOME/ompi/built \ --enable-shared \ --enable-mpi-fortran \ --with-tm=/opt/pbs/default \ --with-pbs \ --with-ucx=$HOME/ucx/built \ --with-libevent=internal \ --enable-mpi1-compatibility \ --with-knem=/opt/knem \ --without-xpmem \ --without-verbs \ --enable-mca-static \ --with-hcoll=/apps/hcoll/4.8.3220 \ --with-lustre \ --with-io-romio-flags=--with-file-system=lustre+ufs \ --enable-orterun-prefix-by-default \ --with-platform=$HOME/ompi/contrib/platform/mellanox/optimized \ --with-ucc=$HOME/ucc/built \ --enable-sparse-groups \ --with-zlib ```