# OS: RHEL/CentOS 7.5 ### openmpi + ucx 專區 ### fast ucx ```bash mpirun --allow-run-as-root --mca pml ucx --mca scoll ucx --mca atomic ucx --map-by node -x UCX_NET_DEVICES=mlx4_0:1 -np 64 -H a1:16,a2:16,b1:16,b2:16 <command to application> ``` ### with GPUDirect RDMA ```bash -x UCX_TLS=rc,sm,cuda_copy,gdr_copy,cuda_ipc --mca btl_openib_want_cuda_gdr 1 ``` ### 讓 CentOS 保持在 7.5 ```bash cat > /etc/yum.repos.d/CentOS-Vault.repo <<'EOF' # C7.5.1804 [C7.5.1804-base] name=CentOS-7.5.1804 - Base baseurl=http://vault.centos.org/7.5.1804/os/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 enabled=0 [C7.5.1804-updates] name=CentOS-7.5.1804 - Updates baseurl=http://vault.centos.org/7.5.1804/updates/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 enabled=0 [C7.5.1804-extras] name=CentOS-7.5.1804 - Extras baseurl=http://vault.centos.org/7.5.1804/extras/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 enabled=0 [C7.5.1804-centosplus] name=CentOS-7.5.1804 - CentOSPlus baseurl=http://vault.centos.org/7.5.1804/centosplus/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 enabled=0 [C7.5.1804-fasttrack] name=CentOS-7.5.1804 - Fasttrack baseurl=http://vault.centos.org/7.5.1804/fasttrack/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 enabled=0 EOF ``` ```bash yum-config-manager --disable CentOS\* yum-config-manager --enable C7.5\* ``` ### install essential ```bash yum update -y && yum install -y mlocate git \ python3 \ bc tree svn gcc gcc-c++ \ vim net-tools wget make \ pciutils gtk2 atk cairo \ gcc-gfortran tcsh lsof \ nfs-utils automake autoconf \ tcl tcl-devel tk \ kernel-devel \ kernel-devel-$(uname -r) \ yum-utils \ device-mapper-persistent-data \ lvm2 \ openssl-devel \ libuuid-devel \ libseccomp-devel \ squashfs-tools \ alsa-lib \ gettext-devel \ libevent-devel \ python-devel \ flex \ libpsm2-devel.x86_64 \ gtk3 \ libXScrnSaver \ numactl-devel \ binutils-devel \ libudev-devel \ libxml2-devel \ xorg-x11-server-Xorg xorg-x11-xauth xorg-x11-apps \ libtool bison bison-devel \ doxygen graphviz \ bzip2 man \ libnl libmnl numactl \ libmnl tcl gcc-gfortran tk \ git \ environment-modules ``` ### 添加epel repository ``` yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm ``` ### 雜項 (hostname 要重開才會變) ```bash echo "root:0" | chpasswd echo "export LANG=en_US.UTF-8" >> ~/.bashrc systemctl stop firewalld systemctl disable firewalld hostnamectl set-hostname mkdir /app /isc exec bash ``` ## CMake ```bash cd /tmp wget https://cmake.org/files/v3.11/cmake-3.11.0.tar.gz tar vxf cmake-3.11.0.tar.gz -C /app cd /app/cmake-3.11.0 ./bootstrap --parallel=$(nproc) make -j$(nproc) make install -j$(nproc) ``` ## infiniband ### ofed driver ```bash cd /tmp wget http://content.mellanox.com/ofed/MLNX_OFED-4.4-2.0.7.0/MLNX_OFED_LINUX-4.4-2.0.7.0-rhel7.5-x86_64.tgz tar vxf MLNX_OFED_LINUX-4.4-2.0.7.0-rhel7.5-x86_64.tgz -C /app cd /app/MLNX_OFED_LINUX-4.4-2.0.7.0-rhel7.5-x86_64 ./mlnxofedinstall --all --force /etc/init.d/openibd restart ``` ### install hpcx ```bash cd /tmp wget http://content.mellanox.com/hpc/hpc-x/v2.4/hpcx-v2.4.0-gcc-MLNX_OFED_LINUX-4.4-2.0.7.0-redhat7.5-x86_64.tbz tar xvf hpcx-v2.4.0-gcc-MLNX_OFED_LINUX-4.4-2.0.7.0-redhat7.5-x86_64.tbz -C /app mv /app/hpcx-v2.4.0-gcc-MLNX_OFED_LINUX-4.4-2.0.7.0-redhat7.5-x86_64 /app/hpcx ``` ### rebuild openmpi with intel compiler in hpcx ```bash cd /app/hpcx mv ompi _ompi export HPCX_HOME=$PWD cd sources tar xpf ${HPCX_HOME}/sources/openmpi-gitclone.tar.gz cd openmpi-gitclone module load icc ./configure CC=icc CXX=icpc F77=ifort FC=ifort --prefix=${HPCX_HOME}/ompi \ --with-hcoll=${HPCX_HOME}/hcoll --with-ucx=${HPCX_HOME}/ucx \ --with-platform=contrib/platform/mellanox/optimized --with-tm=/app/pbs \ --with-cuda=/app/cuda10.1 2>&1 | tee config-icc-output.log make all -j$(nproc) 2>&1 | tee build_icc.log && \ make -j$(nproc) install 2>&1 | tee install_icc.log ``` #### 如果有 cuda ```bash --with-cuda=<cuda dir> ``` #### 如果有 pbs ```bash --with-tm=<pbs dir> ``` ### hpcx modulefile ```bash mkdir /usr/share/Modules/modulefiles/hpcx ln -s /app/hpcx/modulefiles/hpcx /usr/share/Modules/modulefiles/hpcx/2.4.0 ``` ### network-ib0 ```bash vim /etc/sysconfig/network-scripts/ifcfg-ib0 ``` ```bash CONNECTED_MODE=no TYPE=InfiniBand PROXY_METHOD=none BROWSER_ONLY=no BOOTPROTO=static DEFROUTE=yes IPV4_FAILURE_FATAL=no IPV6INIT=yes IPV6_AUTOCONF=yes IPV6_DEFROUTE=yes IPV6_FAILURE_FATAL=no IPV6_ADDR_GEN_MODE=stable-privacy NAME=ib0 DEVICE=ib0 ONBOOT=yes IPADDR=10.0.0.X NETMASK=255.255.255.0 ``` ```bash systemctl restart network ``` ## ib sm (在其中一台做就可以) ```bash echo "Default=0xffff, ipoib, mtu=5, defmember=both: ALL=both;" > /etc/opensm/partitions.conf systemctl restart opensmd systemctl enable opensmd ``` ### GCC 5.1.0, 9.3.0 ## GCC-$version ```bash export version=自己挑 cd /tmp wget https://ftp.gnu.org/gnu/gcc/gcc-${version}/gcc-${version}.tar.gz tar vxf gcc-${version}.tar.gz -C /app cd /app/gcc-${version} ./contrib/download_prerequisites mkdir build cd build ../configure --enable-checking=release \ --enable-languages=c,c++,fortran --disable-multilib \ --prefix=/usr/local/gcc-${version} make -j$(nproc) make install -j$(nproc) mkdir /usr/share/Modules/modulefiles/gcc cat > /usr/share/Modules/modulefiles/gcc/${version} <<EOF #%Module set TOP_DIR /usr/local/gcc-${version} prepend-path PATH \${TOP_DIR}/bin prepend-path LD_LIBRARY_PATH \${TOP_DIR}/lib64 EOF ``` ## Intel :::warning 與下面 psxe_runtime 擇一安裝 ::: ### Intel psxe 2019.5 #### obtain psxe ```bash cd /tmp wget http://registrationcenter-download.intel.com/akdlm/irc_nas/tec/15809/parallel_studio_xe_2019_update5_cluster_edition_online.tgz tar xvf parallel_studio_xe_2019_update5_cluster_edition_online.tgz -C /app ``` #### configure silent.cfg ```bash cd /app/parallel_studio_xe_2019_update5_cluster_edition_online sed -i 's/ACCEPT_EULA=decline/ACCEPT_EULA=accept/' silent.cfg sed -i 's/#ACTIVATION_SERIAL_NUMBER=snpat/ACTIVATION_SERIAL_NUMBER=S477-Z28VJXXB/' silent.cfg sed -i 's/ACTIVATION_TYPE=exist_lic/ACTIVATION_TYPE=serial_number/' silent.cfg ``` #### install psxe ``` ./install.sh -s silent.cfg ``` #### 小調整 (只能做一次) ```bash sed -i '133d;134d;135d;136d;137d' /opt/intel/compilers_and_libraries_2019.5.281/linux/bin/compilervars.sh ``` #### psxe module file ```bash cd /app git clone https://git.code.sf.net/p/env2/code env2-code mkdir /usr/share/Modules/modulefiles/icc mkdir /usr/share/Modules/modulefiles/mkl mkdir /usr/share/Modules/modulefiles/impi echo "#%Module" > /usr/share/Modules/modulefiles/icc/2019.5.281 perl /app/env2-code/env2 -from bash -to modulecmd \ "/opt/intel/compilers_and_libraries_2019.5.281/linux/bin/compilervars.sh intel64" >> /usr/share/Modules/modulefiles/icc/2019.5.281 echo "#%Module" > /usr/share/Modules/modulefiles/mkl/2019.5.281 perl /app/env2-code/env2 -from bash -to modulecmd \ "/opt/intel/compilers_and_libraries_2019.5.281/linux/mkl/bin/mklvars.sh intel64" >> /usr/share/Modules/modulefiles/mkl/2019.5.281 echo "#%Module" > /usr/share/Modules/modulefiles/impi/2019.5.281 perl /app/env2-code/env2 -from bash -to modulecmd \ "/opt/intel/compilers_and_libraries_2019.5.281/linux/mpi/intel64/bin/mpivars.sh" >> /usr/share/Modules/modulefiles/impi/2019.5.281 ``` ### Intel psxe 2018.4 #### obtain psxe ```bash cd /tmp wget http://registrationcenter-download.intel.com/akdlm/irc_nas/tec/13717/parallel_studio_xe_2018_update4_cluster_edition_online.tgz tar xvf parallel_studio_xe_2018_update4_cluster_edition_online.tgz -C /app ``` #### configure silent.cfg ```bash cd /app/parallel_studio_xe_2018_update4_cluster_edition_online sed -i 's/ACCEPT_EULA=decline/ACCEPT_EULA=accept/' silent.cfg sed -i 's/#ACTIVATION_SERIAL_NUMBER=snpat/ACTIVATION_SERIAL_NUMBER=S477-Z28VJXXB/' silent.cfg sed -i 's/ACTIVATION_TYPE=exist_lic/ACTIVATION_TYPE=serial_number/' silent.cfg ``` #### install psxe ``` ./install.sh -s silent.cfg ``` #### 小調整 (只能做一次) ```bash sed -i '121d;122d;123d;124d' /opt/intel/compilers_and_libraries_2018.5.274/linux/bin/compilervars.sh ``` #### psxe module file ```bash echo "#%Module" > /usr/share/Modules/modulefiles/icc/2018.5.274 perl /app/env2-code/env2 -from bash -to modulecmd \ "/opt/intel/compilers_and_libraries_2018.5.274/linux/bin/compilervars.sh intel64" >> /usr/share/Modules/modulefiles/icc/2018.5.274 echo "#%Module" > /usr/share/Modules/modulefiles/mkl/2018.5.274 perl /app/env2-code/env2 -from bash -to modulecmd \ "/opt/intel/compilers_and_libraries_2018.5.274/linux/mkl/bin/mklvars.sh intel64" >> /usr/share/Modules/modulefiles/mkl/2018.5.274 echo "#%Module" > /usr/share/Modules/modulefiles/impi/2018.5.274 perl /app/env2-code/env2 -from bash -to modulecmd \ "/opt/intel/compilers_and_libraries_2018.5.274/linux/mpi/intel64/bin/mpivars.sh" >> /usr/share/Modules/modulefiles/impi/2018.5.274 ``` ## Intel Runtime: 只 for 跑程式 :::warning 與上面 psxe 擇一安裝 ::: ### obtain psxe_runtime repo ``` yum-config-manager --add-repo https://yum.repos.intel.com/2018/setup/intel-psxe-runtime-2018.reposudo yum-config-manager --add-repo https://yum.repos.intel.com/2018/setup/intel-psxe-runtime-2018.repo rpm --import https://yum.repos.intel.com/2018/setup/RPM-GPG-KEY-intel-psxe-runtime-2018 ``` ### install components (可自選) - 從<a href="https://software.intel.com/en-us/articles/installing-intel-parallel-studio-xe-runtime-2020-using-yum-repository">這裡</a>看有什麼可以選 ``` yum install -y intel-icc-runtime-64bit-2018.4-274.x86_64 \ intel-ifort-runtime-64bit-2018.4-274.x86_64 \ intel-mkl-runtime-64bit-2018.4-274.x86_64 \ intel-mpi-runtime-64bit-2018.4-274.x86_64 ``` ### env variables ``` source /opt/intel/psxe_runtime/linux/bin/psxevars.sh ``` ## hostfile ```bash vim /etc/hosts 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 10.0.0.1 a1 10.0.0.2 a2 10.0.0.3 b1 10.0.0.4 b2 ``` ## 互串 ```bash ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa ssh-copy-id root@a1 ssh-copy-id root@a2 ssh-copy-id root@b1 ssh-copy-id root@b2 ``` ### 設定ssh捨棄knownhost ``` cat > ~/.ssh/config <<EOF StrictHostKeyChecking=no UserKnownHostsFile=/dev/null EOF ``` ## GPU ### blacklist nouveau ```bash vim /etc/modprobe.d/blacklist-nouveau.conf blacklist nouveau options nouveau modeset=0 dracut --force ``` ### CUDA 10.1 ```bash cd /tmp wget https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.105_418.39_linux.run sh cuda_10.1.105_418.39_linux.run ** 然後就開始 GUI 操作嚕 ** ``` ### CUDA 10.2 ```bash wget http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-rhel7-10-2-local-10.2.89-440.33.01-1.0-1.x86_64.rpm rpm -ivh cuda-repo-rhel7-10-2-local-10.2.89-440.33.01-1.0-1.x86_64.rpm yum clean all yum install -y dkms yum install -y nvidia-driver-latest-dkms cuda yum install -y cuda-drivers ``` ### CUDA 10.0 ```bash wget -O cuda_10.0.130_410.48_linux.run https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux sh cuda_10.0.130_410.48_linux.run \ --silent --toolkit --verbose ``` ### modulefile ```bash mkdir /usr/share/Modules/modulefiles/cuda vim /usr/share/Modules/modulefiles/cuda/10.2 #%Module conflict cuda set CUDA_PATH /usr/local/cuda-10.2 setenv CUDA_DIR ${CUDA_PATH} prepend-path PATH ${CUDA_PATH}/bin prepend-path LD_LIBRARY_PATH ${CUDA_PATH}/lib64 prepend-path CPATH ${CUDA_PATH}/include vim /usr/share/Modules/modulefiles/cuda/10.0 #%Module conflict cuda set CUDA_PATH /usr/local/cuda-10.0 setenv CUDA_DIR ${CUDA_PATH} prepend-path PATH ${CUDA_PATH}/bin prepend-path LD_LIBRARY_PATH ${CUDA_PATH}/lib64 prepend-path CPATH ${CUDA_PATH}/include ``` ### env variable(for openmpi) ```bash echo "module load cuda/10.0" >> ~/.bashrc ``` ### reboot ```bash reboot ``` ## mpich (option) ### obtain mpich ``` wget http://www.mpich.org/static/downloads/3.3.2/mpich-3.3.2.tar.gz ``` ### install mpich ``` tar xvf mpich-3.3.2.tar.gz cd mpich-3.3.2 ./configure --prefix=/app/mpich make -j16 make install -j16 ``` ### create mpich modulefile ``` mkdir /usr/share/Modules/modulefiles/mpich vim /usr/share/Modules/modulefiles/mpich/3.3.2 #%Module conflict intel set TOP_DIR /app/mpich prepend-path PATH ${TOP_DIR}/bin prepend-path LD_LIBRARY_PATH ${TOP_DIR}/lib prepend-path MANPATH ${TOP_DIR}/share/man ``` ## openmpi (option) ### install openmpi ```bash cd /tmp wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz tar xvf openmpi-3.1.5.tar.gz -C /app cd /app/openmpi-3.1.5 module purge module load cuda/10.0 ./configure --with-cuda --prefix=/app/openmpi-3.1.5 make -j4 make install -j4 ``` ### create openmpi modulefile ```bash mkdir /usr/share/Modules/modulefiles/openmpi vim /usr/share/Modules/modulefiles/openmpi/3.1.5 #%Module conflict intel set TOP_DIR /app/openmpi-3.1.5 prepend-path PATH ${TOP_DIR}/bin prepend-path LD_LIBRARY_PATH ${TOP_DIR}/lib prepend-path MANPATH ${TOP_DIR}/share/man ``` ## HPL <img src="https://i.imgur.com/NJh8F1B.jpg" width=50px> ### obtain hpl-2.0_FERMI_v15 ```bash wget -O hpl-2.0_FERMI_v15.tar 34.80.90.52:8090/upload/2019/10/hpl-2.0_FERMI_v15-d12aaaa9505945cdbfb1ee1c2d5d8a32.tar tar xvf hpl-2.0_FERMI_v15.tar -C /app ``` ### configure Make.CUDA ```bash cd /app/hpl-2.0_FERMI_v15 vim Make.CUDA TOPdir = /app/hpl-2.0_FERMI_v15 MPdir = /app/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64 MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/release/libmpi.so LAdir = /app/intel/compilers_and_libraries_2020.0.166/linux/mkl/lib/intel64 LAinc = -I$(LAdir)/../../include CC = mpiicc CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -fopenmp -std=c99 LINKER = $(CC) ``` ### make ```bash module purge module load intel cuda/10.0 make arch=CUDA -j4 ``` ### HPL.dat ```bash cd bin/CUDA vim HPL.dat ``` HPL.dat : ```bash HPLinpack benchmark input file Innovative Computing Laboratory, University of Tennessee HPL.out output file name (if any) 6 device out (6=stdout,7=stderr,file) 1 # of problems sizes (N) 50000 Ns 1 # of NBs 256 NBs 0 PMAP process mapping (0=Row-,1=Column-major) 1 # of process grids (P x Q) 1 Ps 2 Qs 16.0 threshold 1 # of panel fact 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) 1 # of recursive stopping criterium 2 8 NBMINs (>= 1) 1 # of panels in recursion 2 NDIVs 1 # of recursive panel fact. 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) 1 # of broadcast 0 2 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 1 # of lookahead depth 1 0 DEPTHs (>=0) 1 SWAP (0=bin-exch,1=long,2=mix) 192 swapping threshold 1 L1 in (0=transposed,1=no-transposed) form 1 U in (0=transposed,1=no-transposed) form 1 Equilibration (0=no,1=yes) 8 memory alignment in double (> 0) ``` ### run_linpack ```bash vim run_linpack HPL_DIR=/app/hpl-2.0_FERMI_v15 mpirun -np 2 -ppn 1 -f ~/hosts $HPL_DIR/bin/CUDA/xhpl ``` ### run hpl ```bash module purge module load intel cuda/10.0 ./run_linpack ``` ## HPCG <img src="https://i.imgur.com/NJh8F1B.jpg" width=50px> ### obtain HPCG ```bash wget http://www.hpcg-benchmark.org/downloads/hpcg-3.1_cuda-10_ompi-3.1_gcc485_sm_35_sm_50_sm_60_sm_70_sm75_ver_10_9_18.tgz tar vxf hpcg-3.1_cuda-10_ompi-3.1_gcc485_sm_35_sm_50_sm_60_sm_70_sm75_ver_10_9_18.tgz -C /app ``` ### HPL.dat ```bash cd /app/hpcg-3.1_cuda-10_ompi-3.1_gcc485_sm_35_sm_50_sm_60_sm_70_sm75_ver_10_9_18 vim hpcg.dat #調整參數 ``` ### run HPCG ```bash module purge module load openmpi cuda/10.0 mpirun --allow-run-as-root --prefix /app/openmpi -np 2 --host ib1,ib2 ./xhpcg-3.1_gcc_485_cuda-10.0.130_ompi-3.1.0_sm_35_sm_50_sm_60_sm_70_sm_75_ver_10_9_18 ``` ## HPCC <img src="https://i.imgur.com/ntqP33x.png" width=50px> ### obtain hpcc-1.5.0 ```bash wget http://icl.cs.utk.edu/projectsfiles/hpcc/download/hpcc-1.5.0.tar.gz tar xvf hpcc-1.5.0.tar.gz -C /app ``` ### configure Make.Linux_ATHLON_CBLAS ```bash cd /app/hpcc-1.5.0/hpl cp setup/Make.Linux_ATHLON_CBLAS . vim Make.Linux_ATHLON_CBLAS TOPdir = /app/hpcc-1.5.0/hpl MPdir = /app/intel/compilers_and_libraries_2020.0.166/linux/mpi/intel64 MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/release/libmpi.so LAdir = /app/intel/compilers_and_libraries_2020.0.166/linux/mkl/lib/intel64 LAinc = -I$(LAdir)/../../include LAlib = -L$(LAdir) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 CC = mpiicc CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -std=c99 ``` ### make ```bash cd .. module purge module load intel make arch=Linux_ATHLON_CBLAS -j4 ``` ### run hpcc ```bash cp _hpccinf.txt hpccinf.txt vim hpccinf.txt #調整 P, Q, NB.... mpirun -np 8 -f ~/hosts ./hpcc ``` ## Elmer/Ice <img src="https://i.imgur.com/ntqP33x.png" width=50px> ### obtain elmer ```bash git clone https://www.github.com/ElmerCSC/elmerfem ``` ### make elmer builddir ```bash cd elmerfem mkdir builddir cd builddir ``` ### create build sciprt ```bash vim build.sh ``` <p>build.sh :</p> ```bash #!/bin/bash CMAKE=cmake # Installation directory TIMESTAMP=$(date +"%m-%d-%y") ELMER_REV="Elmer_devel_${TIMESTAMP}" ELMERSRC="/root/elmerfem" BUILDDIR="$ELMERSRC/builddir" IDIR="/usr/local/$ELMER_REV" cd ${BUILDDIR} module purge module load intel/2019.10.24 $CMAKE $ELMERSRC \ -DCMAKE_INSTALL_PREFIX=$IDIR \ -DCMAKE_C_COMPILER=mpiicc \ -DCMAKE_CXX_COMPILER=mpiicpc \ -DCMAKE_Fortran_COMPILER=mpiifort \ -DWITH_MPI:BOOL=TRUE \ -DWITH_ElmerIce:BOOL=TRUE make -j4 && make install rm /usr/local/Elmer-devel ln -s $IDIR /usr/local/Elmer-devel ``` ### build elmer/ice ```bash source build.sh ``` ### modulefile ```bash mkdir /usr/share/Modules/modulefiles/elmer vim /usr/share/Modules/modulefiles/elmer/1.0 #%Module prepend-path PATH /usr/local/Elmer-devel/bin ``` ### obtain LID3D ```bash cd wget https://hpcadvisorycouncil.atlassian.net/wiki/download/attachments/1169784836/LID3D.tgz tar xvf LID3D.tgz ``` ### test LID3D_20k with 8 process ```bash cd LID3D module purge module load intel module load elmer ElmerGrid 2 2 LID3D_extrude_20k/ -metiskway 8 mpirun -np 8 -f ~/hosts ElmerSolver ``` ## NFS ### **Type 1: NFS over Ethernet** ``` mkdir /nfs chmod 777 /nfs ``` #### server端 (n11) ``` vim /etc/exports /nfs *(rw,sync,no_root_squash,no_all_squash) ``` #### client端 ``` mount -t nfs n11:/nfs /nfs ``` ### **Type 2: NFS over RDMA** ``` mkdir /nfs-rdma chmod 777 /nfs-rdma ``` #### server端 (IB1) ``` vim /etc/rdma/rdma.conf SVCRDMA_LOAD=yes vim /etc/sysconfig/nfs RPCNFSDARGS="--rdma=20049" echo rdma 20049 > /proc/fs/nfsd/portlist vi /etc/exports /nfs-rdma *(rw,sync,no_root_squash,no_all_squash) ``` #### client端 ``` vim /etc/rdma/rdma.conf XPRTRDMA_LOAD=yes mount -t nfs -o proto=rdma,port=20049 IB1:/nfs-rdma /nfs-rdma ``` ### **Type 3: NFS over ZFS over Ethernet** #### 磁區設定 ##### 檢查/dev/sda4 ``` parted -l ``` ##### 若/dev/sda4的Type為extended ``` fdisk /dev/sda m(查看命令) d(delete) 4 n(new) p(primary) enter enter w(write) reboot ``` #### install zfs ##### CentOS ``` yum install -y epel-release yum --enablerepo=epel install -y dkms yum install -y http://download.zfsonlinux.org/epel/zfs-release.el7_7.noarch.rpm yum install -y zfs yum remove -y zfs zfs-kmod spl spl-kmod libzfs2 libnvpair1 libuutil1 libzpool2 zfs-release yum install -y http://download.zfsonlinux.org/epel/zfs-release.el7_7.noarch.rpm yum autoremove -y yum clean metadata yum install -y zfs ``` #### configure zfs ##### module ``` modprobe zfs ``` ##### create pool ``` zpool create -f zdisk1 /dev/sda4 ``` ##### 檢查zfs的service, 確保為enable ``` systemctl status zfs.target ``` #### 卸載module ``` module unload <name> ``` #### 查看MODULEPATH上的module ``` module avail ``` #### 查看loaded module ``` module list ``` ## 有用軟體 ### tshark #### install tshark ``` yum install -y wireshark ``` #### run tshark ``` tshark -i <interface> -w <output-file> ``` #### non-root user ##### root端設定 ``` usermod -aG wireshark t1 setcap cap_net_raw,cap_net_admin=eip /usr/sbin/dumpcap ``` ### ibdump ``` wget https://linux.mellanox.com/public/repo/mlnx_ofed/4.7-1.0.0.1/rhel7.7/x86_64/MLNX_LIBS/ibdump-5.0.0-3.47100.x86_64.rpm rpm -iv ibdump-5.0.0-3.47100.x86_64.rpm ``` ### iperf3 ``` yum install -y iperf3 ``` ### iftop ``` yum -y install libpcap libpcap-devel ncurses ncurses-devel yum install -y iperf3 wget http://www.ex-parrot.com/pdw/iftop/download/iftop-0.17.tar.gz tar -zxvf iftop-0.17.tar.gz cd iftop-0.17 ./configure make make install ``` ### hamachi #### install ``` wget https://www.vpn.net/installers/logmein-hamachi-2.1.0.203-x64.tgz tar xvf logmein-hamachi-2.1.0.203-x64.tgz cd logmein-hamachi-2.1.0.203-x64 ./install.sh ``` #### login ``` hamachi login ``` #### create ``` hamachi create <group id> ``` ### join ``` hamachi join <group id> ``` ## 各種查看指令 ### 版本 #### linux核心版本 ``` uname -r ``` #### linux distribution ``` cat /etc/redhat-release ``` ### 磁區 #### 所有mount點 ``` df -h ``` #### partition相關 ``` fdisk -l parted -l ``` #### 磁區關聯 ``` lsblk ``` ### 套件 (以gcc為例) #### 已安裝 ##### rpm ``` rpm -qa | grep gcc ``` ##### yum ``` yum list installed | grep gcc ``` #### 未安裝 ``` yum search gcc ``` #### 未安裝+版本 ``` yum --showduplicates list gcc ``` ## AOCC (option) ### install ```bash wget https://developer.amd.com/aocc-compiler-eula/aocc-compiler-2.1.0.tar tar vxf aocc-compiler-2.1.0.tar cd aocc-compiler-2.1.0 ./install.sh mkdir /usr/local/Modules/modulefiles/aocc cp /root/aocc-compiler-2.1.0_module /usr/local/Modules/modulefiles/aocc/2.1.0 ``` ## flame (option) ```bash git clone https://github.com/amd/libflame.git cd libflame ./configure --enable-dynamic-build --enable-builtin-blas \ --enable-lapack2flame --enable-supermatrix \ --enable-multithreading=openmp --enable-gpu \ --enable-cblas-interfaces --prefix=/usr/local/libflame make -j16 make install ``` ## 把系統維持在舊版本的方法 (CentOS 6.9) ```bash yum-config-manager --disable CentOS\* ``` ```bash vi /etc/yum.repos.d/CentOS-Vault.repo ``` 加入以下行數到最後 ```bash [C6.9-base] name=CentOS-6.9 - Base baseurl=http://vault.centos.org/6.9/os/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6 enabled=0 [C6.9-updates] name=CentOS-6.9 - Updates baseurl=http://vault.centos.org/6.9/updates/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6 enabled=0 [C6.9-extras] name=CentOS-6.9 - Extras baseurl=http://vault.centos.org/6.9/extras/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6 enabled=0 [C6.9-contrib] name=CentOS-6.9 - Contrib baseurl=http://vault.centos.org/6.9/contrib/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6 enabled=0 [C6.9-centosplus] name=CentOS-6.9 - CentOSPlus baseurl=http://vault.centos.org/6.9/centosplus/$basearch/ gpgcheck=1 gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6 enabled=0 ``` ```bash yum-config-manager --enable C6.9\* ``` ## 手動分磁區 (限 LVM) ### 把 home 變 50G ```bash yum install -y gdisk umount /dev/mapper/VolGroup-lv_home e2fsck -f /dev/mapper/VolGroup-lv_home resize2fs /dev/mapper/VolGroup-lv_home 50G mount /dev/mapper/VolGroup-lv_home /home lvreduce -L 50G /dev/mapper/VolGroup-lv_home ``` ### 把 root 變 1.6T ```bash lvextend -L 1.6T /dev/VolGroup/lv_root resize2fs /dev/VolGroup/lv_root ``` ## 手動編 environment module ### installation ```bash cd /tmp wget https://github.com/cea-hpc/modules/releases/download/v4.4.1/modules-4.4.1.tar.gz tar xvf modules-4.4.1.tar.gz -C /opt cd /opt/modules-4.4.1 ./configure --prefix=/usr/share/Modules make -j16 make install -j16 echo ". /usr/share/Modules/init/bash" > /etc/profile.d/module.sh exec bash ```