#!/bin/bash
set -e
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
FLAG_FILE="/tmp/ai_install_after_reboot.flag"
TIKA_VERSION="2.9.2"
JAVA_HOME="/usr/lib/jvm/java-21-openjdk-amd64"
OPENWEBUI_PORT="8080"
STABLE_DIFFUSION_PORT="7860"
TIKA_PORT="9998"
ROCM_TF_VERSION="2.19.0"
os="ubuntu2404"
arch="x86_64"
check_and_install_gpu_drivers() {
echo "檢查 GPU 硬體..."
if ! lspci | grep -iE 'nvidia|amd' >/dev/null 2>&1; then
echo "[錯誤] 未偵測到 NVIDIA 或 AMD 顯示卡,安裝中止。"
exit 1
fi
DRIVER=$(sudo lshw -C display 2>/dev/null | grep 'configuration:' | grep -o 'driver=[^ ]*' | cut -d'=' -f2)
if echo "$DRIVER" | grep -q '^nvidia$'; then
echo "已偵測到 NVIDIA 專有驅動,跳過驅動安裝。"
return 0
elif echo "$DRIVER" | grep -q '^amdgpu$'; then
echo "已偵測到 AMD 專有驅動,跳過驅動安裝。"
return 0
elif echo "$DRIVER" | grep -q '^nouveau$'; then
echo "偵測到 nouveau 開源驅動,開始安裝 NVIDIA 專有驅動..."
sudo apt update
sudo apt purge -y '*nvidia*'
sudo apt autoremove --purge -y
sudo apt install -y ubuntu-drivers-common
sudo ubuntu-drivers install
echo "NVIDIA 驅動安裝完成,系統將重開機..."
touch "$FLAG_FILE"
sleep 3
sudo reboot
exit 0
elif echo "$DRIVER" | grep -q '^radeon$'; then
echo "偵測到 radeon 開源驅動,開始安裝 AMD 專有驅動(amdgpu)..."
sudo apt update
sudo apt install -y firmware-amd-graphics
echo "amdgpu" > /etc/modules-load.d/amdgpu.conf
echo "AMD 驅動安裝完成,系統將重開機..."
touch "$FLAG_FILE"
sleep 3
sudo reboot
exit 0
else
echo "[錯誤] 未偵測到已知驅動(nvidia、amdgpu、nouveau、radeon),請檢查硬體或手動安裝。"
exit 1
fi
}
install_cuda_cudnn_rocm() {
DRIVER=$(sudo lshw -C display 2>/dev/null | grep 'configuration:' | grep -o 'driver=[^ ]*' | cut -d'=' -f2)
if echo "$DRIVER" | grep -q '^nvidia$' && lspci | grep -qi nvidia; then
if ! command -v nvcc >/dev/null 2>&1; then
echo "未偵測到 CUDA(nvcc),開始安裝..."
sudo apt update
sudo apt install -y nvidia-cuda-toolkit
if command -v nvcc >/dev/null 2>&1; then
echo "CUDA 已安裝,版本:$(nvcc --version | grep release)"
else
echo "[錯誤] CUDA 安裝後未檢測到 nvcc,請檢查 nvidia-cuda-toolkit。"
exit 1
fi
else
echo "CUDA 已安裝,版本:$(nvcc --version | grep release)"
fi
if ! dpkg -l | grep -q cudnn; then
echo "未偵測到 cuDNN,開始安裝..."
# 匯入 NVIDIA GPG 金鑰
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/3bf863cc.pub
# 新增 NVIDIA 軟體源
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/ /" | sudo tee /etc/apt/sources.list.d/cuda.list
sudo apt-get update
# 安裝 cuDNN
sudo apt-get -y install cudnn9-cuda-12
if [ $? -eq 0 ]; then
echo "cuDNN 安裝完成,進行驗證..."
sudo apt-get -y install libcudnn9-samples
cp -r /usr/src/cudnn_samples_v9/ $HOME
cd $HOME/cudnn_samples_v9/mnistCUDNN
make clean && make
./mnistCUDNN && echo "cuDNN 測試通過!" || echo "[警告] cuDNN 範例執行失敗,請檢查日誌或 GPU 環境。"
cd - > /dev/null
else
echo "[錯誤] cuDNN 安裝失敗"
exit 1
fi
else
echo "cuDNN 已安裝"
fi
elif echo "$DRIVER" | grep -q '^amdgpu$' && lspci | grep -qi amd; then
if [ ! -d /opt/rocm ]; then
echo "未偵測到 ROCm,開始安裝..."
sudo apt update
sudo apt install -y wget gnupg2
wget -q https://repo.radeon.com/rocm/rocm.gpg.key -O /tmp/rocm.gpg.key
sudo mkdir -p /etc/apt/keyrings
sudo mv /tmp/rocm.gpg.key /etc/apt/keyrings/
echo 'deb [signed-by=/etc/apt/keyrings/rocm.gpg.key] https://repo.radeon.com/rocm/apt/6.1/ ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list
sudo apt update
sudo apt install -y rocm-dkms
if [ $? -eq 0 ]; then
echo "ROCm 安裝完成,系統將重開機..."
touch "$FLAG_FILE"
sleep 3
sudo reboot
exit 0
else
echo "[錯誤] ROCm 安裝失敗,請檢查網絡或套件來源。"
exit 1
fi
else
echo "ROCm 已安裝"
fi
else
echo "[錯誤] 未檢測到有效 GPU 驅動(nvidia 或 amdgpu),無法安裝 CUDA/cuDNN 或 ROCm。"
exit 1
fi
}
check_environment() {
echo "檢查環境..."
sudo apt update
sudo apt install -y curl git wget lsb-release software-properties-common unzip
UBUNTU_VERSION=$(lsb_release -sr 2>/dev/null || echo "unknown")
DISTRO=$(lsb_release -si 2>/dev/null || echo "unknown")
echo "檢測到系統:${DISTRO} ${UBUNTU_VERSION}"
SYSTEM_PYTHON_VERSION=$(python3 --version 2>&1 | grep -oP '\d+\.\d+' | head -1)
echo "系統預設 Python 版本:${SYSTEM_PYTHON_VERSION}"
echo "添加 deadsnakes/ppa 儲存庫並安裝 Python 3.11 和 Python 3.10..."
sudo add-apt-repository -y ppa:deadsnakes/ppa
sudo apt update
sudo apt install -y python3.11 python3.11-venv python3.11-dev python3.10 python3.10-venv python3.10-dev
if command -v python3.11 >/dev/null 2>&1; then
PYTHON_VERSION="3.11"
PYTHON_CMD="python3.11"
echo "找到 Python 3.11,將用於 TensorFlow 和 OpenWebUI"
if ! $PYTHON_CMD -m pip --version >/dev/null 2>&1; then
curl -sS https://bootstrap.pypa.io/get-pip.py | $PYTHON_CMD
$PYTHON_CMD -m pip install --upgrade pip
fi
else
echo "錯誤:Python 3.11 安裝失敗,無法繼續"
echo "請手動執行以下命令安裝 Python 3.11:"
echo " sudo apt update"
echo " sudo add-apt-repository ppa:deadsnakes/ppa"
echo " sudo apt update"
echo " sudo apt install python3.11 python3.11-venv python3.11-dev"
exit 1
fi
if command -v python3.10 >/dev/null 2>&1; then
echo "找到 Python 3.10,將用於 Stable Diffusion"
if ! python3.10 -m pip --version >/dev/null 2>&1; then
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
python3.10 -m pip install --upgrade pip
fi
else
echo "錯誤:Python 3.10 安裝失敗,無法繼續"
echo "請手動執行以下命令安裝 Python 3.10:"
echo " sudo apt update"
echo " sudo add-apt-repository ppa:deadsnakes/ppa"
echo " sudo apt update"
echo " sudo apt install python3.10 python3.10-venv python3.10-dev"
exit 1
fi
if ! [ -d "$JAVA_HOME" ]; then
echo "安裝 OpenJDK 21..."
sudo apt install -y openjdk-21-jdk
JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
fi
echo "JAVA_HOME=$JAVA_HOME" >> /etc/environment
if ! command -v mvn &> /dev/null; then
echo "安裝 Maven..."
sudo apt install -y maven
fi
$PYTHON_CMD --version || { echo "錯誤:Python 3.11 安裝失敗,退出"; exit 1; }
$PYTHON_CMD -m pip --version || { echo "錯誤:pip for Python 3.11 安裝失敗,退出"; exit 1; }
python3.10 --version || { echo "錯誤:Python 3.10 安裝失敗,退出"; exit 1; }
python3.10 -m pip --version || { echo "錯誤:pip for Python 3.10 安裝失敗,退出"; exit 1; }
echo "使用的 Python 命令:$PYTHON_CMD (3.11 for TensorFlow/OpenWebUI), python3.10 (for Stable Diffusion)"
}
install_tensorflow() {
echo "安裝 TensorFlow(使用 $PYTHON_CMD)..."
export TF_CPP_MIN_LOG_LEVEL=2
export TF_ENABLE_ONEDNN_OPTS=0
$PYTHON_CMD -m venv "$WORKDIR/tf_env"
if ! source "$WORKDIR/tf_env/bin/activate"; then
echo "錯誤:無法激活 TensorFlow 虛擬環境,請檢查 $WORKDIR/tf_env"
exit 1
fi
pip install --upgrade --root-user-action=ignore pip
pip install --upgrade --root-user-action=ignore setuptools wheel
if lspci | grep -qi nvidia; then
echo "安裝 NVIDIA GPU 版本的 TensorFlow..."
pip install --root-user-action=ignore tensorflow==2.19.0
elif lspci | grep -qi "amd" && [ -d "/opt/rocm" ]; then
echo "安裝 AMD ROCm 版本的 TensorFlow..."
pip install --root-user-action=ignore tensorflow-rocm==$ROCM_TF_VERSION
else
echo "安裝 CPU 版本的 TensorFlow..."
pip install --root-user-action=ignore tensorflow==2.19.0
fi
echo "驗證 TensorFlow 安裝..."
python -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'; import tensorflow as tf; print('TensorFlow Version:', tf.__version__); print('GPU Devices:', tf.config.list_physical_devices('GPU'))" || {
echo "錯誤:TensorFlow 驗證失敗"
deactivate || echo "警告:無法解除虛擬環境,繼續"
exit 1
}
echo "下載 VGG16 模型..."
wget --progress=bar:force:noscroll -q --show-progress https://dl4jdata.blob.core.windows.net/models/vgg16_dl4j_inference.zip -O "$WORKDIR/vgg16_dl4j_inference.zip" || {
echo "錯誤:VGG16 模型下載失敗"
deactivate || echo "警告:無法解除虛擬環境,繼續"
exit 1
}
echo "VGG16 模型下載完成"
export LD_LIBRARY_PATH=/usr/local/lib:/usr/lib:/usr/lib64:$LD_LIBRARY_PATH
echo "export LD_LIBRARY_PATH=/usr/local/lib:/usr/lib:/usr/lib64:\$LD_LIBRARY_PATH" >> /etc/environment
echo "export TF_CPP_MIN_LOG_LEVEL=2" >> /etc/environment
echo "export TF_ENABLE_ONEDNN_OPTS=0" >> /etc/environment
deactivate || echo "警告:無法解除虛擬環境,繼續"
echo "TensorFlow 安裝完成"
}
install_ollama() {
echo "安裝 Ollama..."
curl -fsSL https://ollama.com/install.sh | sh
export OLLAMA_MODELS="$OLLAMA_MODELS_DIR"
echo "export OLLAMA_MODELS=$OLLAMA_MODELS_DIR" >> /etc/environment
echo "請輸入 Ollama 模組名稱 (預設: llama3:8b-q2_K,適合 4GB VRAM):"
read -r OLLAMA_MODEL
OLLAMA_MODEL="${OLLAMA_MODEL:-llama3:8b-q2_K}"
ollama pull "$OLLAMA_MODEL"
if lspci | grep -qi nvidia; then
GPU_COUNT=$(nvidia-smi -L | wc -l)
else
GPU_COUNT=0
fi
if [ "$GPU_COUNT" -gt 0 ]; then
echo "偵測到 $GPU_COUNT 張 NVIDIA GPU。"
echo "請輸入要使用的 GPU 數量 (預設: 1,最大: $GPU_COUNT):"
read -r GPU_NUM
GPU_NUM=${GPU_NUM:-1}
# 檢查輸入是否合法
if ! [[ "$GPU_NUM" =~ ^[0-9]+$ ]] || [ "$GPU_NUM" -lt 1 ] || [ "$GPU_NUM" -gt "$GPU_COUNT" ]; then
echo "輸入無效,將僅使用 1 張 GPU。"
GPU_NUM=1
fi
# 生成 CUDA_VISIBLE_DEVICES
if [ "$GPU_NUM" -eq 1 ]; then
CUDA_VISIBLE_DEVICES="0"
else
CUDA_VISIBLE_DEVICES=$(seq -s ',' 0 $((GPU_NUM-1)))
fi
else
CUDA_VISIBLE_DEVICES=""
fi
cat << EOF > /etc/systemd/system/ollama.service
[Unit]
Description=Ollama Service
After=network-online.target
[Service]
ExecStart=/usr/local/bin/ollama serve
User=root
Group=root
Restart=always
RestartSec=3
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/cuda-12.9/bin"
Environment="OLLAMA_MODELS=$OLLAMA_MODELS_DIR"
Environment="OLLAMA_HOST=0.0.0.0:11434"
Environment="OLLAMA_CUDA_ENABLED=true"
Environment="OLLAMA_NUM_PARALLEL=4"
Environment="OLLAMA_SCHED_SPREAD=true"
Environment="OLLAMA_KEEP_ALIVE=24h"
$( [ -n "$CUDA_VISIBLE_DEVICES" ] && echo "Environment=\"CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES\"")
Environment="LD_LIBRARY_PATH=/usr/local/cuda-12.9/lib64:/usr/lib:/usr/lib64"
Environment="NVIDIA_VISIBLE_DEVICES=all"
Environment="NVIDIA_DRIVER_CAPABILITIES=compute,utility"
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable ollama.service
sudo systemctl start ollama.service
echo "Ollama 安裝完成"
}
install_openwebui() {
echo "安裝 OpenWebUI(使用 $PYTHON_CMD)..."
$PYTHON_CMD -m venv "$WORKDIR/open-webui-venv"
if ! source "$WORKDIR/open-webui-venv/bin/activate"; then
echo "錯誤:無法激活 OpenWebUI 虛擬環境,請檢查 $WORKDIR/open-webui-venv"
exit 1
fi
pip install --upgrade --root-user-action=ignore pip
pip install --root-user-action=ignore open-webui
if [ $? -ne 0 ]; then
echo "錯誤:無法安裝 open-webui,請檢查網絡或 PyPI 儲存庫"
exit 1
fi
echo "請輸入 Stable Diffusion 模組名稱 (預設:runwayml/stable-diffusion-v1-5):"
read -r SD_MODEL
SD_MODEL="${SD_MODEL:-runwayml/stable-diffusion-v1-5}"
mkdir -p "$WORKDIR/open-webui"
cat << EOF > "$WORKDIR/open-webui/.env"
ENABLE_IMAGE_GENERATION=True
IMAGE_GENERATION_ENGINE=automatic1111
AUTOMATIC1111_BASE_URL=http://localhost:$STABLE_DIFFUSION_PORT
IMAGE_GENERATION_MODEL=$SD_MODEL
EOF
cat << EOF > /etc/systemd/system/openwebui.service
[Unit]
Description=OpenWebUI Service
After=network.target
[Service]
Type=simple
User=root
ExecStart=$WORKDIR/open-webui-venv/bin/open-webui serve --port $OPENWEBUI_PORT
WorkingDirectory=$WORKDIR/open-webui
Restart=always
RestartSec=10
Environment="PATH=$WORKDIR/open-webui-venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable openwebui.service
sudo systemctl start openwebui.service
deactivate || echo "警告:無法解除虛擬環境,繼續"
echo "OpenWebUI 安裝完成"
}
install_tika() {
echo "安裝 Apache Tika..."
# Tika 需要 Java 11,檢查是否已安裝
TIKA_JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
if [ ! -d "$TIKA_JAVA_HOME" ]; then
echo "未偵測到 Java 11,正在安裝..."
sudo apt update
sudo apt install -y openjdk-11-jdk
fi
# 為 Tika 設置 Java 11 環境
export JAVA_HOME="$TIKA_JAVA_HOME"
export PATH="$JAVA_HOME/bin:$PATH"
export MAVEN_OPTS="-Xmx2048m"
if [ ! -d "$JAVA_HOME" ]; then
echo "錯誤:JAVA_HOME ($JAVA_HOME) 不存在,請確認 Java 11 是否安裝正確"
exit 1
fi
if [ -d "$WORKDIR/tika-$TIKA_VERSION" ]; then
echo "檢測到已存在的 Tika 目錄,正在更新..."
cd "$WORKDIR/tika-$TIKA_VERSION"
else
echo "下載 Tika 源碼..."
local max_retries=3
local retry_count=0
local download_success=false
while [ $retry_count -lt $max_retries ] && [ "$download_success" = false ]; do
rm -f "$WORKDIR/tika-$TIKA_VERSION-src.zip"
if curl -L --progress-bar -o "$WORKDIR/tika-$TIKA_VERSION-src.zip" "https://archive.apache.org/dist/tika/$TIKA_VERSION/tika-$TIKA_VERSION-src.zip"; then
local file_size=$(stat -c%s "$WORKDIR/tika-$TIKA_VERSION-src.zip")
if [ $file_size -gt 1000000 ]; then
if unzip -t "$WORKDIR/tika-$TIKA_VERSION-src.zip" > /dev/null 2>&1; then
download_success=true
echo "Tika 源碼下載成功並驗證完成"
else
echo "文件完整性檢查失敗,重試中... (嘗試 $((retry_count + 1))/$max_retries)"
fi
else
echo "下載的文件太小,可能不完整,重試中... (嘗試 $((retry_count + 1))/$max_retries)"
fi
else
echo "下載失敗,重試中... (嘗試 $((retry_count + 1))/$max_retries)"
fi
if [ "$download_success" = false ]; then
retry_count=$((retry_count + 1))
if [ $retry_count -lt $max_retries ]; then
echo "等待 5 秒後重試..."
sleep 5
fi
fi
done
if [ "$download_success" = false ]; then
echo "錯誤:在 $max_retries 次嘗試後仍無法下載有效的 Tika 源碼"
exit 1
fi
echo "解壓 Tika 源碼..."
unzip -q "$WORKDIR/tika-$TIKA_VERSION-src.zip" -d "$WORKDIR" || { echo "錯誤:解壓 Tika 源碼失敗"; exit 1; }
cd "$WORKDIR/tika-$TIKA_VERSION"
fi
echo "安裝必要工具..."
sudo apt install -y bc || { echo "錯誤:安裝 bc 失敗"; exit 1; }
if [ -f "tika-dl/pom.xml" ]; then
echo "更新 tika-dl/pom.xml..."
cp tika-dl/pom.xml tika-dl/pom.xml.bak
sed -i '/<\/dependencies>/i \
<dependency>\
<groupId>org.tensorflow<\/groupId>\
<artifactId>tensorflow-core-platform<\/artifactId>\
<version>0.8.0<\/version>\
<\/dependency>\
<dependency>\
<groupId>org.deeplearning4j<\/groupId>\
<artifactId>deeplearning4j-zoo<\/artifactId>\
<version>1.0.0-beta7<\/version>\
<\/dependency>' tika-dl/pom.xml
echo "已更新 tika-dl/pom.xml 文件"
else
echo "警告:找不到 tika-dl/pom.xml 文件,跳過更新"
fi
echo "編譯 Tika..."
mvn clean install -DskipTests -Dforbiddenapis.skip=true -Dossindex.skip=true
echo "Tika 安裝完成"
}
configure_tika_service() {
echo "配置 Tika 服務..."
# 確保使用 Java 11 路徑
TIKA_JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
cat << EOF > /etc/systemd/system/tika.service
[Unit]
Description=Apache Tika Server
After=network.target
[Service]
Type=simple
User=root
ExecStart=$TIKA_JAVA_HOME/bin/java -jar $WORKDIR/tika-$TIKA_VERSION/tika-server/tika-server-standard/target/tika-server-standard-$TIKA_VERSION.jar --host 0.0.0.0 --port $TIKA_PORT
Restart=always
RestartSec=10
Environment="JAVA_HOME=$TIKA_JAVA_HOME"
WorkingDirectory=$WORKDIR/tika-$TIKA_VERSION/tika-server/tika-server-standard/target
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable tika.service
sudo systemctl start tika.service
}
install_stable_diffusion() {
echo "安裝 Stable Diffusion WebUI(使用 Python 3.10)..."
if [ "$EUID" -ne 0 ]; then
echo "錯誤:請以 root 或 sudo 執行腳本以創建 systemd 服務"
exit 1
fi
if [ -z "$WORKDIR" ]; then
echo "錯誤:WORKDIR 未定義,請設置 WORKDIR 環境變數(例如:export WORKDIR=/mnt/model1)"
exit 1
fi
if command -v python3.10 >/dev/null 2>&1; then
SD_PYTHON="python3.10"
echo "使用 Python 3.10 進行 Stable Diffusion 安裝"
else
echo "錯誤:Python 3.10 未安裝或不可用,Stable Diffusion WebUI 要求 Python 3.10"
echo "請執行以下命令安裝 Python 3.10:"
echo " sudo apt update"
echo " sudo add-apt-repository ppa:deadsnakes/ppa"
echo " sudo apt update"
echo " sudo apt install python3.10 python3.10-venv python3.10-dev"
exit 1
fi
if ! id "sd-user" >/dev/null 2>&1; then
echo "創建非 root 使用者 sd-user..."
sudo useradd -m -s /bin/bash sd-user
sudo usermod -aG sudo sd-user
fi
echo "確保工作目錄權限正確..."
mkdir -p "$WORKDIR"
sudo chown sd-user:sd-user "$WORKDIR"
sudo chmod 775 "$WORKDIR"
echo "克隆 Stable Diffusion WebUI 倉庫..."
rm -rf "$WORKDIR/stable-diffusion-webui"
git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git "$WORKDIR/stable-diffusion-webui"
if [ $? -ne 0 ]; then
echo "錯誤:無法克隆 Stable Diffusion WebUI 倉庫,請檢查網絡或 GitHub 可用性"
exit 1
fi
sudo chown -R sd-user:sd-user "$WORKDIR/stable-diffusion-webui"
sudo chmod -R 775 "$WORKDIR/stable-diffusion-webui"
echo "創建 Python 3.10 虛擬環境..."
rm -rf "$WORKDIR/stable-diffusion-webui/venv"
sudo -u sd-user $SD_PYTHON -m venv "$WORKDIR/stable-diffusion-webui/venv"
if ! source "$WORKDIR/stable-diffusion-webui/venv/bin/activate"; then
echo "錯誤:無法激活 Stable Diffusion 虛擬環境,請檢查 $WORKDIR/stable-diffusion-webui/venv"
exit 1
fi
echo "更新 pip 並修復警告..."
sudo -u sd-user curl -sS https://bootstrap.pypa.io/get-pip.py | sudo -u sd-user $SD_PYTHON
sudo -u sd-user pip install --no-cache-dir --upgrade --force-reinstall pip setuptools wheel
sudo -u sd-user pip show pip setuptools wheel
echo "預先安裝 clip 和 huggingface_hub 套件以避免安裝錯誤..."
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore clip huggingface_hub
if [ $? -ne 0 ]; then
echo "警告:clip 或 huggingface_hub 套件安裝失敗,嘗試從 GitHub 主分支安裝 clip..."
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore git+https://github.com/openai/CLIP.git@main
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore huggingface_hub
if [ $? -ne 0 ]; then
echo "錯誤:無法安裝 clip 或 huggingface_hub 套件,請檢查網絡或 PyPI/GitHub 可用性"
exit 1
fi
fi
echo "執行 webui.sh 進行依賴安裝..."
cd "$WORKDIR/stable-diffusion-webui"
timeout 1800 sudo -u sd-user bash -c "source $WORKDIR/stable-diffusion-webui/venv/bin/activate && ./webui.sh -f --skip-torch-cuda-test --no-download-sd-model --exit" || {
echo "錯誤:webui.sh 安裝過程失敗或超時(30 分鐘),請檢查終端輸出或手動執行以下命令檢查:"
echo " cd $WORKDIR/stable-diffusion-webui"
echo " source venv/bin/activate"
echo " ./webui.sh -f --skip-torch-cuda-test --no-download-sd-model --exit"
exit 1
}
if [ ! -d "$WORKDIR/stable-diffusion-webui/venv" ]; then
echo "錯誤:虛擬環境未創建,請檢查 $WORKDIR/stable-diffusion-webui/venv 是否存在"
exit 1
fi
echo "驗證虛擬環境 Python 版本..."
python_version=$(sudo -u sd-user $SD_PYTHON --version)
if [[ "$python_version" != *"3.10"* ]]; then
echo "錯誤:虛擬環境 Python 版本不是 3.10,實際為 $python_version"
exit 1
fi
echo "虛擬環境 Python 版本:$python_version"
echo "請輸入 Stable Diffusion 模組名稱 (預設:runwayml/stable-diffusion-v1-5):"
read -r SD_MODEL
SD_MODEL="${SD_MODEL:-runwayml/stable-diffusion-v1-5}"
echo "檢查 Stable Diffusion 模型是否存在..."
mkdir -p "$WORKDIR/stable-diffusion-webui/models/Stable-diffusion"
sudo chown -R sd-user:sd-user "$WORKDIR/stable-diffusion-webui/models/Stable-diffusion"
if [ ! -f "$WORKDIR/stable-diffusion-webui/models/Stable-diffusion/v1-5-pruned-emaonly.safetensors" ]; then
echo "下載 Stable Diffusion 模型:$SD_MODEL..."
sudo -u sd-user bash -c "source $WORKDIR/stable-diffusion-webui/venv/bin/activate && pip install --no-cache-dir huggingface_hub && huggingface-cli download runwayml/stable-diffusion-v1-5 v1-5-pruned-emaonly.safetensors --local-dir $WORKDIR/stable-diffusion-webui/models/Stable-diffusion" || {
echo "錯誤:無法下載模型 $SD_MODEL,請檢查網絡或 Hugging Face 認證"
echo "請以 sd-user 身份手動執行以下命令下載模型:"
echo " sudo -u sd-user bash -c 'source $WORKDIR/stable-diffusion-webui/venv/bin/activate && pip install huggingface_hub && huggingface-cli login && huggingface-cli download runwayml/stable-diffusion-v1-5 v1-5-pruned-emaonly.safetensors --local-dir $WORKDIR/stable-diffusion-webui/models/Stable-diffusion'"
exit 1
}
else
echo "模型檔案 v1-5-pruned-emaonly.safetensors 已存在,跳過下載"
fi
echo "安裝 xformers 系統依賴..."
sudo apt install -y build-essential cmake
echo "安裝 PyTorch 和 xformers..."
if lspci | grep -qi "nvidia"; then
CUDA_VERSION=$(nvcc --version | grep "release" | awk '{print $5}' | cut -d'.' -f1-2)
echo "檢測到 CUDA 版本: $CUDA_VERSION"
case $CUDA_VERSION in
"12.9" | "12.1")
TORCH_URL="https://download.pytorch.org/whl/cu121"
;;
"12.0")
TORCH_URL="https://download.pytorch.org/whl/cu120"
;;
"11.8")
TORCH_URL="https://download.pytorch.org/whl/cu118"
;;
*)
echo "未檢測到支持的 CUDA 版本,使用 CPU 版本"
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore torch torchvision torchaudio
COMMANDLINE_ARGS="-f --api --listen --lowvram"
;;
esac
if [ -n "$TORCH_URL" ]; then
echo "使用 PyTorch 下載地址: $TORCH_URL"
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore torch==2.0.1 torchvision==0.15.2 torchaudio --index-url "$TORCH_URL"
if [ $? -ne 0 ]; then
echo "錯誤:PyTorch 安裝失敗,請檢查網絡或 PyTorch 儲存庫($TORCH_URL)"
exit 1
fi
echo "安裝 xformers..."
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore xformers==0.0.22
if [ $? -ne 0 ]; then
echo "警告:xformers 安裝失敗,嘗試從源碼編譯..."
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore git+https://github.com/facebookresearch/xformers.git@v0.0.22
if [ $? -ne 0 ]; then
echo "警告:xformers 安裝失敗,Stable Diffusion 可能無法使用記憶體優化"
COMMANDLINE_ARGS="-f --api --listen --lowvram"
else
COMMANDLINE_ARGS="--xformers -f --api --listen --lowvram"
fi
else
COMMANDLINE_ARGS="--xformers -f --api --listen --lowvram"
fi
fi
elif lspci | grep -qi "amd" && [ -d "/opt/rocm" ]; then
echo "安裝 AMD ROCm 版本的 PyTorch..."
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm${ROCM_TF_VERSION%.*}
COMMANDLINE_ARGS="--use-rocm -f --api --listen --opt-sdp-attention"
else
echo "安裝 CPU 版本的 PyTorch..."
sudo -u sd-user pip install --no-cache-dir --root-user-action=ignore torch torchvision torchaudio
COMMANDLINE_ARGS="-f --api --listen --lowvram"
fi
echo "驗證 xformers 安裝..."
sudo -u sd-user bash -c "source $WORKDIR/stable-diffusion-webui/venv/bin/activate && python -c \"import xformers; print('xformers version:', xformers.__version__)\"" || {
echo "警告:xformers 未正確安裝,Stable Diffusion 將不使用記憶體優化"
COMMANDLINE_ARGS="-f --api --listen --lowvram"
}
cat << EOF > "$WORKDIR/stable-diffusion-webui/webui-user.sh"
#!/bin/bash
export COMMANDLINE_ARGS="$COMMANDLINE_ARGS"
EOF
sudo chown sd-user:sd-user "$WORKDIR/stable-diffusion-webui/webui-user.sh"
sudo chmod +x "$WORKDIR/stable-diffusion-webui/webui-user.sh"
echo "檢查 /etc/systemd/system/ 寫入權限..."
if [ ! -w "/etc/systemd/system/" ]; then
echo "錯誤:/etc/systemd/system/ 目錄不可寫,請檢查檔案系統權限"
exit 1
fi
SERVICE_FILE="/etc/systemd/system/stable-diffusion-webui.service"
echo "創建 systemd 服務檔案:$SERVICE_FILE..."
sudo tee "$SERVICE_FILE" > /dev/null << EOF
[Unit]
Description=Stable Diffusion WebUI Service
After=network.target
Wants=network-online.target
[Service]
Type=simple
User=sd-user
Group=sd-user
ExecStart=/bin/bash -c "source $WORKDIR/stable-diffusion-webui/venv/bin/activate && $WORKDIR/stable-diffusion-webui/webui.sh -f"
WorkingDirectory=$WORKDIR/stable-diffusion-webui
Restart=always
RestartSec=10
Environment="PATH=$WORKDIR/stable-diffusion-webui/venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
Environment="LD_LIBRARY_PATH=/usr/local/cuda-12.9/lib64:/usr/lib:/usr/lib64"
Environment="CUDA_VISIBLE_DEVICES=0"
Environment="NVIDIA_VISIBLE_DEVICES=all"
Environment="NVIDIA_DRIVER_CAPABILITIES=compute,utility"
StandardOutput=append:/var/log/stable-diffusion-webui.log
StandardError=append:/var/log/stable-diffusion-webui.log
[Install]
WantedBy=multi-user.target
EOF
if [ ! -f "$SERVICE_FILE" ]; then
echo "錯誤:無法創建 systemd 服務檔案 $SERVICE_FILE,請檢查 /etc/systemd/system/ 寫入權限"
exit 1
fi
sudo chmod 644 "$SERVICE_FILE"
sudo chown root:root "$SERVICE_FILE"
sudo touch /var/log/stable-diffusion-webui.log
sudo chmod 664 /var/log/stable-diffusion-webui.log
sudo chown sd-user:sd-user /var/log/stable-diffusion-webui.log
echo "重新載入 systemd 配置..."
sudo systemctl daemon-reload
if [ $? -ne 0 ]; then
echo "錯誤:systemctl daemon-reload 失敗,請檢查 systemd 配置"
exit 1
fi
echo "啟用 stable-diffusion-webui.service..."
sudo systemctl enable stable-diffusion-webui.service
if [ $? -ne 0 ]; then
echo "錯誤:無法啟用 stable-diffusion-webui.service,請檢查服務檔案:"
cat "$SERVICE_FILE"
exit 1
fi
echo "啟動 stable-diffusion-webui.service..."
sudo systemctl start stable-diffusion-webui.service
if [ $? -ne 0 ]; then
echo "錯誤:無法啟動 stable-diffusion-webui.service,請檢查日誌:"
echo " sudo journalctl -u stable-diffusion-webui.service -b"
echo " cat /var/log/stable-diffusion-webui.log"
exit 1
fi
sleep 5
if sudo systemctl is-active --quiet stable-diffusion-webui.service; then
echo "Stable Diffusion WebUI 服務已成功啟動"
else
echo "錯誤:Stable Diffusion WebUI 服務未運行,請檢查日誌:"
echo " sudo journalctl -u stable-diffusion-webui.service -b"
exit 1
fi
echo "Stable Diffusion WebUI 已安裝完成。請確保模型位於 $WORKDIR/stable-diffusion-webui/models/Stable-diffusion"
echo "WebUI 可通過 http://$(hostname -I | awk '{print $1}'):7860 訪問"
deactivate || echo "警告:無法解除虛擬環境,繼續"
}
verify_installation() {
echo "驗證安裝..."
sudo systemctl is-active --quiet tika && echo "Tika 服務運行中" || echo "Tika 服務未運行"
sudo systemctl is-active --quiet openwebui && echo "OpenWebUI 服務運行中" || echo "OpenWebUI 服務未運行"
sudo systemctl is-active --quiet stable-diffusion-webui && echo "Stable Diffusion WebUI 服務運行中" || echo "Stable Diffusion WebUI 服務未運行"
sudo systemctl is-active --quiet ollama && echo "Ollama 服務運行中" || echo "Ollama 服務未運行"
ollama list || echo "Ollama 未運行"
curl -s http://localhost:11434/api/version || echo "Ollama API 未開放"
}
main() {
if [ "$EUID" -ne 0 ]; then
echo "請使用 sudo 權限運行此腳本"
exit 1
fi
echo "正在更新系統套件..."
sudo apt update
sudo apt upgrade -y
if [ $? -eq 0 ]; then
echo "系統已更新到最新套件。"
else
echo "系統更新失敗,請檢查網絡或手動更新。"
exit 1
fi
echo "請輸入工作目錄 (預設: /mnt/model1):"
read -r WORKDIR
WORKDIR="${WORKDIR:-/mnt/model1}"
echo "請輸入 Ollama 模組存放路徑 (預設: $WORKDIR/ollama/models):"
read -r OLLAMA_MODELS_DIR
OLLAMA_MODELS_DIR="${OLLAMA_MODELS_DIR:-$WORKDIR/ollama/models}"
echo "開始部署:Ollama, OpenWebUI, Apache Tika, Stable Diffusion WebUI"
echo "變數設定:WORKDIR=$WORKDIR, TIKA_VERSION=$TIKA_VERSION, JAVA_HOME=$JAVA_HOME"
echo "端口:OpenWebUI=$OPENWEBUI_PORT, Stable Diffusion=$STABLE_DIFFUSION_PORT, Tika=$TIKA_PORT"
echo "Ollama 模組存放路徑:$OLLAMA_MODELS_DIR"
mkdir -p "$WORKDIR"
mkdir -p "$OLLAMA_MODELS_DIR"
echo "開始安裝程序..."
echo "步驟 1: 檢測 GPU 並安裝驅動"
check_and_install_gpu_drivers
echo "步驟 2: 安裝 CUDA/cuDNN 或 ROCm"
install_cuda_cudnn_rocm
echo "步驟 3: 檢查環境"
check_environment || { echo "錯誤:環境檢查失敗,無法繼續安裝"; exit 1; }
echo "步驟 4: 安裝 TensorFlow"
install_tensorflow || { echo "警告:TensorFlow 安裝過程中出現問題,但將繼續安裝其他組件"; }
echo "步驟 5: 安裝 Ollama"
install_ollama || { echo "警告:Ollama 安裝過程中出現問題,但將繼續安裝其他組件"; }
echo "步驟 6: 安裝 OpenWebUI"
install_openwebui || { echo "警告:OpenWebUI 安裝過程中出現問題,但將繼續安裝其他組件"; }
echo "步驟 7: 安裝 Tika"
install_tika || { echo "警告:Tika 安裝過程中出現問題,但將繼續安裝其他組件"; }
echo "步驟 8: 配置 Tika 服務"
configure_tika_service || { echo "警告:Tika 服務配置過程中出現問題,但將繼續完成安裝"; }
echo "步驟 9: 安裝 Stable Diffusion"
install_stable_diffusion || { echo "警告:Stable Diffusion 安裝過程中出現問題,但將繼續完成安裝"; }
echo "步驟 10: 驗證安裝"
verify_installation
echo "所有服務已安裝完成。"
echo "OpenWebUI 可通過 http://$(hostname -I | awk '{print $1}'):${OPENWEBUI_PORT} 訪問"
echo "Stable Diffusion WebUI 可通過 http://$(hostname -I | awk '{print $1}'):${STABLE_DIFFUSION_PORT} 訪問"
echo "Apache Tika 服務運行在 http://$(hostname -I | awk '{print $1}'):${TIKA_PORT}"
echo "Ollama API 運行在 http://$(hostname -I | awk '{print $1}'):11434"
echo "設置系統環境變數..."
cat << 'EOF' > /etc/profile.d/ai-tools.sh
#!/bin/bash
export PATH="/bin:/usr/bin:/usr/local/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
export PATH="/usr/local/cuda-12.9/bin:$PATH"
export LD_LIBRARY_PATH="/usr/local/cuda-12.9/lib64:$LD_LIBRARY_PATH"
if [ -d "/opt/rocm" ]; then
export PATH="/opt/rocm/bin:$PATH"
export LD_LIBRARY_PATH="/opt/rocm/lib:$LD_LIBRARY_PATH"
fi
export JAVA_HOME="/usr/lib/jvm/java-21-openjdk-amd64"
export PATH="$JAVA_HOME/bin:$PATH"
export PATH="/usr/local/bin:$PATH"
export WORKDIR="${WORKDIR:-/mnt/model1}"
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
EOF
chmod +x /etc/profile.d/ai-tools.sh
source /etc/profile.d/ai-tools.sh
echo "環境變數設置完成。請重新登入或執行 'source /etc/profile.d/ai-tools.sh' 使設置生效。"
echo "安裝完成!"
}
main