# H100
## minioforge
```
wget https://github.com/conda-forge/miniforge/releases/download/24.9.0-0/Miniforge3-24.9.0-0-Linux-x86_64.sh
chmod a+x Miniforge3-24.9.0-0-Linux-x86_64.sh
./Miniforge3-24.9.0-0-Linux-x86_64.sh
```
## cuquantum
```
wget https://developer.download.nvidia.com/compute/cuquantum/redist/cuquantum/linux-x86_64/cuquantum-linux-x86_64-24.08.0.5_cuda12-archive.tar.xz
tar -Jxv -f cuquantum-linux-x86_64-24.08.0.5_cuda12-archive.tar.xz
mv cuquantum-linux-x86_64-24.08.0.5_cuda12-archive /home/p00acy00/libraries
```
## NVHPC
自己修改的nvhpc-hpcx-cuda12 modulefile
```
module use /home/p00acy00/modulefiles/
module load nvhpc-hpcx-cuda12/24.7
```
/home/p00acy00/modulefiles/nvhpc-hpcx-cuda12/24.7
```
#%Module1.0
# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
conflict nvhpc
conflict nvhpc-nompi
conflict nvhpc-byo-compiler
conflict nvhpc-hpcx
conflict nvhpc-openmpi3
set nvhome /work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7
set target Linux_x86_64
set version 24.7
set cudaver 12.5
set nvcudadir $nvhome/$target/$version/cuda
set nvcompdir $nvhome/$target/$version/compilers
set nvmathdir $nvhome/$target/$version/math_libs
set nvcommdir $nvhome/$target/$version/comm_libs
setenv NVHPC $nvhome
setenv NVHPC_ROOT $nvhome/$target/$version
setenv CC $nvcompdir/bin/nvc
setenv CXX $nvcompdir/bin/nvc++
setenv FC $nvcompdir/bin/nvfortran
setenv F90 $nvcompdir/bin/nvfortran
setenv F77 $nvcompdir/bin/nvfortran
setenv CPP cpp
setenv CUDA_HOME $nvcudadir
setenv CUDA_PATH $nvcudadir
prepend-path PATH $nvcudadir/bin
prepend-path PATH $nvcompdir/bin
prepend-path PATH $nvcompdir/extras/qd/bin
prepend-path PATH $nvcommdir/$cudaver/hpcx/hpcx-2.19/ompi/bin
prepend-path LD_LIBRARY_PATH $nvcudadir/lib64
prepend-path LD_LIBRARY_PATH $nvcudadir/extras/CUPTI/lib64
prepend-path LD_LIBRARY_PATH $nvcompdir/extras/qd/lib
prepend-path LD_LIBRARY_PATH $nvcompdir/lib
prepend-path LD_LIBRARY_PATH $nvmathdir/lib64
prepend-path LD_LIBRARY_PATH $nvcommdir/nccl/lib
prepend-path LD_LIBRARY_PATH $nvcommdir/nvshmem/lib
prepend-path LD_LIBRARY_PATH $nvcommdir/$cudaver/hpcx/hpcx-2.19/ompi/lib
prepend-path CPATH $nvmathdir/include
prepend-path CPATH $nvcommdir/nccl/include
prepend-path CPATH $nvcommdir/nvshmem/include
prepend-path CPATH $nvcompdir/extras/qd/include/qd
prepend-path CPATH $nvcudadir/include
prepend-path CPATH $nvcommdir/$cudaver/hpcx/hpcx-2.19/ompi/include
set hpcxmoddir $nvcommdir/$cudaver/hpcx/latest/modulefiles
if { [ module-info mode load ] } {
module use $hpcxmoddir
module load hpcx
}
```
nvc, nvcc 不是gcc,所以有很多問題是很正常的,例如用pip裝mpi4py就會
```
/work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7/Linux_x86_64/24.7/comm_libs/12.5/hpcx/hpcx-2.19/ompi/bin/mpicc -fno-strict-overflow -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit-pip/include -fPIC -O2 -isystem /home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit-pip/include -fPIC -I/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit-pip/include/python3.12 -c _configtest.c -o _configtest.o
nvc-Error-Unknown switch: -fno-strict-overflow
failure.
```
斟酌使用
# Check if ucx was built with CUDA support
```
[p00acy00@lgn01 test]$ ucx_info -v
# Library version: 1.16.0
# Library path: /usr/lib64/libucs.so.0
# API headers version: 1.16.0
# Git branch '', revision a73e3f3
# Configured with: --build=x86_64-redhat-linux-gnu --host=x86_64-redhat-linux-gnu --program-prefix= --disable-dependency-tracking --prefix=/usr --exec-prefix=/usr --bindir=/usr/bin --sbindir=/usr/sbin --sysconfdir=/etc --datadir=/usr/share --includedir=/usr/include --libdir=/usr/lib64 --libexecdir=/usr/libexec --localstatedir=/var --sharedstatedir=/var/lib --mandir=/usr/share/man --infodir=/usr/share/info --disable-optimizations --disable-logging --disable-debug --disable-assertions --enable-mt --disable-params-check --without-go --without-java --enable-cma --with-cuda --with-gdrcopy --with-verbs --with-knem --with-rdmacm --without-rocm --with-xpmem --without-fuse3 --without-ugni --with-cuda=/usr/local/cuda-12.2
```
# Use ompi_info to verify cuda support in Open MPI
```
[p00acy00@lgn01 test]$ ompi_info | grep "MPI extensions"
MPI extensions: affinity, cuda, pcollreq
[p00acy00@lgn01 test]$ ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
mca:mpi:base:param:mpi_built_with_cuda_support:value:true
```
## mamba env
> mamba install <package_name>=<version>=<build_string>
```
mamba create -n cuquantum-qiskit -c conda-forge python=3.12 cuquantum cuquantum-python cuda-version=12.5 qiskit=1.2.4=py312h12e396e_0 qiskit-aer=0.15.1=cuda125_py312hde3b351_0 qiskit-ibm-runtime jupyter matplotlib pandas mpi4py openmpi=4.*.*=external_*
mamba create -n cuquantum-qiskit -c conda-forge python=3.12 cuquantum cuquantum-python cuda-version=12.5 qiskit jupyter matplotlib pandas scikit-learn mpi4py openmpi=4.*.*=external_*
```
error
```
Package Version Build Channel Size
────────────────────────────────────────────────────────────────────────────────
Install:
────────────────────────────────────────────────────────────────────────────────
+ mpi 1.0 openmpi conda-forge 4kB
+ openmpi 4.1.6 external_1 conda-forge 12kB
+ mpi4py 3.1.6 py312h67a4414_0 conda-forge 568kB
Change:
────────────────────────────────────────────────────────────────────────────────
- qiskit-aer 0.15.1 cuda125_py312hde3b351_0 conda-forge Cached
+ qiskit-aer 0.15.1 cpu_py312h880e61a_200 conda-forge 2MB
Downgrade:
────────────────────────────────────────────────────────────────────────────────
- cutensornet 2.5.0 nompi_hbc370b7_105 conda-forge Cached
+ cutensornet 2.4.0 mpi_openmpi_h10b603f_3 conda-forge 2MB
- cuquantum-python 24.08.0 py312hee583db_5 conda-forge Cached
+ cuquantum-python 24.03.0 py312h32b3722_3 conda-forge 615kB
Summary:
Install: 3 packages
Change: 1 packages
Downgrade: 2 packages
```
conda-forge中的qiskit-aer 0.15.1 cuda125_py312hde3b351_0,
要求了`cutensornet >=2.5.0`還有`nompi_*`,
這導致了qiskit-aer只能在mpi和cuda選一個。
[https://conda-forge.org/packages/](https://conda-forge.org/packages/)
```
__glibc >=2.17,<3.0.a0
_openmp_mutex >=4.5
cuda-version >=12.0,<13
cuda-version >=12.5,<13
custatevec >=1.6.0,<2.0a0
cutensornet >=2.5.0,<3.0a0 nompi_*
libblas >=3.9.0,<4.0a0
libgcc
libgcc-ng >=13
liblapack >=3.9.0,<4.0a0
libstdcxx
libstdcxx-ng >=13
numpy >=1.16.3
psutil >=5
python >=3.12,<3.13.0a0
python_abi 3.12.* *_cp312
qiskit >=0.45.2
scipy >=1.0
```
## python virtualenv
> pip install <package_name>==<version>
```
mamba create -n cuquantum-qiskit-pip -c conda-forge python=3.12 mpi4py openmpi=4.*.*=external_*
pip install cuquantum-cu12 cuquantum-python-cu12 jupyter matplotlib pandas scikit-learn
pip install qiskit qiskit-aer-gpu qiskit-ibm-runtime qiskit[visualization] qiskit-algorithms qiskit-machine-learning nvidia-cusparse-cu12==12.3.1.170 nvidia-nvjitlink-cu12==12.5.82
```
error
```
Traceback (most recent call last):
File "/home/p00acy00/test/cuTN-QSVM_Li/benchmark/banchmark_qsvm_tnsm.py", line 17, in <module>
from qiskit_aer import Aer
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/__init__.py", line 69, in <module>
from qiskit_aer.aerprovider import AerProvider
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/aerprovider.py", line 20, in <module>
from .backends.aer_simulator import AerSimulator
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/backends/__init__.py", line 17, in <module>
from .aer_simulator import AerSimulator
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/backends/aer_simulator.py", line 24, in <module>
from .aerbackend import AerBackend, AerError
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/backends/aerbackend.py", line 33, in <module>
from ..noise.noise_model import NoiseModel, QuantumErrorLocation
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/noise/__init__.py", line 241, in <module>
from .noise_model import NoiseModel
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/noise/noise_model.py", line 38, in <module>
from ..backends.backend_utils import BASIS_GATES
File "/home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/backends/backend_utils.py", line 30, in <module>
from .controller_wrappers import aer_initialize_libraries
ImportError: /home/p00acy00/apps/miniforge3/envs/cuquantum-qiskit/lib/python3.12/site-packages/qiskit_aer/backends/../../nvidia/cusparse/lib/libcusparse.so.12: undefined symbol: __nvJitLinkGetErrorLog_12_6, version libnvJitLink.so.12
```
## submit job
> salloc --partition=gp1d --account=GOV113080 --nodes=1 --ntasks-per-node=1 --cpus-per-task=1 --gres=gpu:1
single-gpu.sh
```
#!/bin/bash
#SBATCH --job-name test # Job name
#SBATCH --output %x-%j.out # Name of stdout output file (%x expands to jobname, %j expands to jobId)
#SBATCH --nodes=1 #Controls the number of nodes allocated to the job
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1 #Controls the number of CPUs allocated per task
#SBATCH --gres=gpu:1
#SBATCH --partition gp1d
#SBATCH --account GOV113080
#SBATCH --time 06:00:00
module purge
module use /home/p00acy00/modulefiles/
module load nvhpc-hpcx-cuda12/24.7
echo "Running on hosts: $SLURM_NODELIST"
echo "Running on $SLURM_NNODES nodes."
echo "Running $SLURM_NTASKS tasks."
echo "$SLURM_MPI_TYPE"
SUBMIT_FILE=`scontrol show job $SLURM_JOB_ID | grep "Command=" | awk 'BEGIN {FS="="}; {print $2}'`
echo "Job script is as follow"
cat $SUBMIT_FILE
echo "End of job script."
echo "Your job starts at `date`"
~/apps/miniforge3/envs/cuquantum-qiskit/bin/python benchmark/banchmark_qsvm_tnsm.py
echo "Your job completed at `date`"
```
## openmpi
hpcx-mpi
```
Configure command line: 'CC=gcc' 'CXX=g++' 'FC=nvfortran'
'LDFLAGS=-Wl,-rpath-link=/proj/nv/libraries/Linux_x86_64/24.7/hpcx-12/258552-rel-1/comm_libs/12.4/hpcx/hpcx-2.19/ucx/lib
-Wl,-rpath-link=/proj/nv/libraries/Linux_x86_64/24.7/hpcx-12/258552-rel-1/comm_libs/12.4/hpcx/hpcx-2.19/hcoll/lib'
'--with-platform=../contrib/platform/nvhpc/optimized'
'--enable-mpi1-compatibility'
'--with-libevent=internal' '--without-xpmem'
'--without-cray-xpmem' '--with-slurm'
'--with-cuda=/proj/cuda/12.4/Linux_x86_64'
'--with-hcoll=/proj/nv/libraries/Linux_x86_64/24.7/hpcx-12/258552-rel-1/comm_libs/12.4/hpcx/hpcx-2.19/hcoll'
'--with-ucc=/proj/nv/libraries/Linux_x86_64/24.7/hpcx-12/258552-rel-1/comm_libs/12.4/hpcx/hpcx-2.19/ucc'
'--with-ucx=/proj/nv/libraries/Linux_x86_64/24.7/hpcx-12/258552-rel-1/comm_libs/12.4/hpcx/hpcx-2.19/ucx'
'--prefix=/proj/nv/libraries/Linux_x86_64/24.7/hpcx-12/258552-rel-1/comm_libs/12.4/hpcx/hpcx-2.19/ompi'
```
system mpi
```
Configure command line: '--prefix=/hpc_sys/HPC_software/LMOD/nvidia/packages/cuda-12.2/openmpi-4.1.6'
'--with-ucx=/hpc_sys/HPC_software/LMOD/nvidia/packages/cuda-12.2/ucx-1.16.0'
'--with-cuda=/hpc_sys/HPC_software/LMOD/nvidia/packages/cuda-12.2'
'--disable-getpwuid' '--disable-static'
'--enable-shared' '--enable-mpi-fortran'
'--enable-mpi1-compatibility' '--without-hcoll'
'--with-slurm' '--with-zlib'
'--with-platform=contrib/platform/mellanox/optimized'
'--with-pmix=internal' '--with-hwloc=internal'
'--with-libevent=internal'
```
```
./configure CC=gcc CXX=g++ FC=gfortran LDFLAGS="-Wl,-rpath-link=/work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7/Linux_x86_64/24.7/comm_libs/12.5/hpcx/hpcx-2.19/ucx/lib -Wl,-rpath-link=/work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7/Linux_x86_64/24.7/comm_libs/12.5/hpcx/hpcx-2.19/hcoll/lib" --with-platform=contrib/platform/mellanox/optimized --enable-mpi-cxx --enable-mpi1-compatibility --without-xpmem --without-cray-xpmem --with-pmix=internal --with-hwloc=internal --with-libevent=internal --without-xpmem --without-cray-xpmem --with-slurm --with-zlib --with-cuda=/work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7/Linux_x86_64/24.7/cuda/12.5 --with-hcoll=/work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7/Linux_x86_64/24.7/comm_libs/12.5/hpcx/hpcx-2.19/hcoll --with-ucc=/work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7/Linux_x86_64/24.7/comm_libs/12.5/hpcx/hpcx-2.19/ucc --with-ucx=/work/HPC_software/LMOD/nvidia/packages/hpc_sdk-24.7/Linux_x86_64/24.7/comm_libs/12.5/hpcx/hpcx-2.19/ucx --prefix=/home/p00acy00/libraries/openmpi-4.1.6
```