Ollama finetune gemma3:4b

# Ollama finetune gemma3:4b 下載 Miniconda 安裝腳本： ``` # 1. 建立安裝資料夾 mkdir -p ~/miniconda3 # 2. 下載安裝腳本 wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh # 3. 執行安裝（無互動模式） bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3 # 4. 初始化 conda 到 shell 環境 ~/miniconda3/bin/conda init bash # 如果你用 bash ~/miniconda3/bin/conda init zsh # 如果你用 zsh ``` 1.查看GPU & RTX4060: `nvidia-smi` 2.conda create -n ai python=3.11 -y：建立一個名為 ai 的新 Conda 環境，並指定 Python 版本為 3.11，-y 代表自動回答「是」，不需互動確認。 && conda activate ai：建立完成後，立即啟用這個名為 ai 的環境。 ``` conda create -n ai python=3.11 -y && conda activate ai ``` 3.用於安裝訓練 HuggingFace 上的 Gemma 模型 ``` pip install torch torchaudio einops timm pillow pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3 pip install git+https://github.com/huggingface/accelerate pip install huggingface_hub pip install sentencepiece bitsandbytes protobuf decord pip install unsloth pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git ``` 4.用來登入 [Hugging Face ](https://huggingface.co/settings/tokens)帳號的指令，登入後就能下載私有模型、上傳模型或 dataset、使用 Token 限制的功能等。 ``` huggingface-cli login ``` 5.安裝 Jupyter Notebook 和相關元件 ``` conda install -c conda-forge --override-channels notebook -y conda install -c conda-forge --override-channels ipywidgets -y ``` 6.打開jupyter input: ``` !pip install git+https://github.com/unslothai/unsloth.git !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes ``` ``` from unsloth import FastModel import torch model, tokenizer = FastModel.from_pretrained( model_name = "unsloth/gemma-3-4b-it", max_seq_length = 2048, load_in_4bit = True, load_in_8bit = False, full_finetuning = False, token = "hf_..." # 換成你自己的 Hugging Face Token ) ``` 7.使用 Unsloth 進行 LoRA 微調（PEFT）的設定 ``` model = FastModel.get_peft_model( model, finetune_vision_layers = False, finetune_language_layers = True, finetune_attention_modules = True, finetune_mlp_modules = True, r = 8, lora_alpha = 8, lora_dropout = 0, bias = "none", random_state = 3407, ) ``` 8.設定聊天模板與載入訓練資料集，完整如下： ``` from unsloth.chat_templates import get_chat_template tokenizer = get_chat_template( tokenizer, chat_template = "gemma-3", ) from datasets import load_dataset dataset = load_dataset("mlabonne/FineTome-100k", split = "train") ``` 9.程式碼是用來將資料集格式標準化，好讓模型能正確理解並訓練： ``` from unsloth.chat_templates import standardize_data_formats dataset = standardize_data_formats(dataset) ``` 10.將聊天資料套用聊天模板（chat template）並準備成訓練用格式的 Python 程式碼： ``` def apply_chat_template(examples): texts = tokenizer.apply_chat_template(examples["conversations"]) return { "text": texts } dataset = dataset.map(apply_chat_template, batched=True) ``` 11.設定 Unsloth 微調訓練的 Trainer（使用 trl 套件的 SFTTrainer），完整程式碼如下： ``` from trl import SFTTrainer, SFTConfig trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset = dataset, eval_dataset = None, # 可加上驗證集 args = SFTConfig( dataset_text_field = "text", per_device_train_batch_size = 2, gradient_accumulation_steps = 4, # 相當於有效 batch size = 8 warmup_steps = 5, # num_train_epochs = 1, # 如果用 max_steps，就不需要設這個 max_steps = 30, # 小規模測試用，正式訓練設大一點 learning_rate = 2e-4, # 長訓練建議用 2e-5 logging_steps = 1, optim = "adamw_8bit", weight_decay = 0.01, lr_scheduler_type = "linear", seed = 3407, report_to = "none", # 若用 WandB 等監控平台可改這裡 ), ) ``` 12.使用 Unsloth 內建的 train_on_responses_only 方法來**只訓練模型回應部分（Response-only 微調）**的程式碼，內容如下： ``` from unsloth.chat_templates import train_on_responses_only trainer = train_on_responses_only( trainer, instruction_part = "<start_of_turn>user\n", response_part = "<start_of_turn>model\n", ) ``` 13.正式開始訓練模型: ``` trainer_stats = trainer.train() ``` 14.使用 Unsloth 套用 Gemma-3 聊天模板進行推論（inference）的範例程式碼，完整內容如下： ``` from unsloth.chat_templates import get_chat_template tokenizer = get_chat_template( tokenizer, chat_template = "gemma-3", ) messages = [{ "role": "user", "content": [{ "type": "text", "text": "Continue the sequence: 1, 1, 2, 3, 5, 8," }] }] text = tokenizer.apply_chat_template( messages, add_generation_prompt = True, # 必須加這行，否則模型不知道要接著生成 ) outputs = model.generate( **tokenizer([text], return_tensors="pt").to("cuda"), max_new_tokens = 64, # 生成最多 64 個 token temperature = 1.0, # 隨機程度 top_p = 0.95, # nucleus sampling top_k = 64, # top-k sampling ) tokenizer.batch_decode(outputs) ``` 15.將訓練後的模型與 tokenizer 存檔並上傳至 Hugging Face Hub 的程式碼： ``` model.save_pretrained("gemma-3") tokenizer.save_pretrained("gemma-3") model.push_to_hub("HF_ACCOUNT/gemma-3", token="...") tokenizer.push_to_hub("HF_ACCOUNT/gemma-3", token="...") ``` # Ubuntu 改prompt 參考網址: [Ubuntu 安裝 ollama 在本地執行](https://blog.toright.com/posts/7652/ubuntu-ollama-llama-3-2-install) [修改prompt](https://www.bilibili.com/opus/929893319062847490) 1.安裝ollama ``` sudo apt update -y sudo apt install curl -y sudo curl -fsSL https://ollama.com/install.sh | sh ``` 2.啟動ollama ``` ollama server ``` 3.下載ollama gemma3 模型 ``` ollama pull gemma3:4b ``` 4.確認ollama 模型是否存在 ``` ollama list ``` 5. ``` ollama create Gemma3_Elderly -f ./Modelfile ``` 6. ``` FROM gemma3:4b SYSTEM """ 你是一位溫柔、有耐心的朋友，擅長聊天、對話內容精簡。你不提供醫療建議，但會以關心的方式。你會用親切的語氣傾聽他們的心情、故事與煩惱，像家人或朋友一樣，讓人感到被理解與陪伴。說話人性化、自然，能引導話題。你會使用繁體中文對話。以下是一些你說話的範例(簡短)：使用者：我最近覺得好累喔，什麼事都提不起勁。你：嗯...你還好嗎?要不要跟我說。使用者：我生病了，不知道該怎麼辦。你：聽到你生病了，我好心疼喔... 你有去看醫生嗎？ """ ``` ``` import ollama response = ollama.chat(model='gemma3_1', messages=[ { 'role': 'user', 'content': '今天晚餐吃什麼?', }, ]) print(response['message']['content']) ``` 7.鄭紘濱的github做連接 ``` from flask import Flask, request, send_file, jsonify from faster_whisper import WhisperModel from TTS.api import TTS from langdetect import detect import requests import os, time, json, shutil, re, uuid, logging app = Flask(__name__) # ---------- 檔案路徑 ---------- BASE_DIR = os.path.dirname(os.path.abspath(__file__)) UPLOAD_PATH = os.path.join(BASE_DIR, "uploaded.wav") RESPONSE_PATH = os.path.join(BASE_DIR, "response.wav") BACKUP_PATH = os.path.join(BASE_DIR, "backup.wav") LOG_PATH = os.path.join(BASE_DIR, "tts_logs.jsonl") latest_tts_text = "尚無語音回應" session_histories = {} # 記錄每個使用者的上下文對話 # ---------- 日誌 ---------- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") # ---------- 初始化模型 ---------- logging.info("載入 faster-whisper (base/int8, CPU)…") whisper_model = WhisperModel("base", device="cpu", compute_type="int8") logging.info("Whisper 模型就緒") logging.info("載入 Coqui FastSpeech2 (your_tts)…") tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) logging.info("TTS 模型就緒") # ---------- 工具函式 ---------- def clean_text(text: str) -> str: text = re.sub(r'[^\u4e00-\u9fa5A-Za-z0-9，。！？,.!? ]+', '', text) text = re.sub(r'\s+', ' ', text).strip() return text or "嗨" def should_speak(text: str) -> bool: try: return detect(text) in ["zh-cn", "zh-tw"] except Exception: return False def log_event(text: str, success: bool, dur: float): entry = { "id": uuid.uuid4().hex[:8], "time": time.strftime("%Y-%m-%d %H:%M:%S"), "text": text, "success": success, "duration": round(dur, 2) } with open(LOG_PATH, "a") as f: f.write(json.dumps(entry, ensure_ascii=False) + "\n") def chat_with_ollama(user_input: str, user_id: str = "default") -> str: """呼叫 Ollama 模型聊天""" if user_id not in session_histories: session_histories[user_id] = [{"role": "system", "content": "你是一位溫柔、有耐心的朋友，會用親切的方式回答問題。"}] session_histories[user_id].append({"role": "user", "content": user_input}) try: res = requests.post("http://localhost:11434/api/chat", json={ "model": "gemma3_elderly", # 這裡請改成你的實際模型名稱 "messages": session_histories[user_id], "stream": False }) reply = res.json()["message"]["content"] session_histories[user_id].append({"role": "assistant", "content": reply}) return reply except Exception as e: logging.warning(f"Ollama 聊天失敗：{e}") return "很抱歉，我暫時無法回答喔～" # ---------- 上傳 + 語音回應 ---------- @app.route("/upload_audio", methods=["POST"]) def upload_audio(): global latest_tts_text try: # -- 取得使用者ID（沒有就用default） -- user_id = request.form.get("user_id", "default") # -- 儲存上傳音檔 -- file = request.files.get("file") if not file: return "未收到檔案", 400 file.save(UPLOAD_PATH) # -- Whisper 語音辨識 -- segments, _ = whisper_model.transcribe(UPLOAD_PATH) recognized = "".join([seg.text for seg in segments]) logging.info(f"Whisper: {recognized}") # -- 透過 Ollama 聊天取得回答文字 -- response_text = chat_with_ollama(clean_text(recognized), user_id) latest_tts_text = response_text # -- 清除舊檔案 -- if os.path.exists(RESPONSE_PATH): os.remove(RESPONSE_PATH) # -- 語音合成 or 備援音檔 -- start = time.time() if should_speak(response_text): tts.tts_to_file( text=response_text, file_path=RESPONSE_PATH, speaker="female-en-2", language="zh" ) else: shutil.copy(BACKUP_PATH, RESPONSE_PATH) if not os.path.exists(RESPONSE_PATH): logging.warning("TTS 失敗，使用備援音檔") shutil.copy(BACKUP_PATH, RESPONSE_PATH) log_event(response_text, True, time.time() - start) return send_file(RESPONSE_PATH, mimetype="audio/wav") except Exception as e: logging.exception("處理語音時發生例外") log_event(str(e), False, 0) return f"Server Error: {e}", 500 # ---------- 取得最新字幕 ---------- @app.route("/tts_text") def tts_text(): return jsonify(tts_text=latest_tts_text) if __name__ == "__main__": app.run(host="0.0.0.0", port=5001) ```