# Ollama finetune gemma3:4b
下載 Miniconda 安裝腳本:
```
# 1. 建立安裝資料夾
mkdir -p ~/miniconda3
# 2. 下載安裝腳本
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
# 3. 執行安裝(無互動模式)
bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
# 4. 初始化 conda 到 shell 環境
~/miniconda3/bin/conda init bash # 如果你用 bash
~/miniconda3/bin/conda init zsh # 如果你用 zsh
```
1.查看GPU & RTX4060:
`nvidia-smi`
2.conda create -n ai python=3.11 -y:建立一個名為 ai 的新 Conda 環境,並指定 Python 版本為 3.11,-y 代表自動回答「是」,不需互動確認。
&& conda activate ai:建立完成後,立即啟用這個名為 ai 的環境。
```
conda create -n ai python=3.11 -y && conda activate ai
```
3.用於安裝訓練 HuggingFace 上的 Gemma 模型
```
pip install torch torchaudio einops timm pillow
pip install --no-deps git+https://github.com/huggingface/transformers@v4.49.0-Gemma-3
pip install git+https://github.com/huggingface/accelerate
pip install huggingface_hub
pip install sentencepiece bitsandbytes protobuf decord
pip install unsloth
pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
```
4.用來登入 [Hugging Face ](https://huggingface.co/settings/tokens)帳號的指令,登入後就能下載私有模型、上傳模型或 dataset、使用 Token 限制的功能等。
```
huggingface-cli login
```
5.安裝 Jupyter Notebook 和相關元件
```
conda install -c conda-forge --override-channels notebook -y
conda install -c conda-forge --override-channels ipywidgets -y
```
6.打開jupyter input:
```
!pip install git+https://github.com/unslothai/unsloth.git
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
```
```
from unsloth import FastModel
import torch
model, tokenizer = FastModel.from_pretrained(
model_name = "unsloth/gemma-3-4b-it",
max_seq_length = 2048,
load_in_4bit = True,
load_in_8bit = False,
full_finetuning = False,
token = "hf_..." # 換成你自己的 Hugging Face Token
)
```
7.使用 Unsloth 進行 LoRA 微調(PEFT) 的設定
```
model = FastModel.get_peft_model(
model,
finetune_vision_layers = False,
finetune_language_layers = True,
finetune_attention_modules = True,
finetune_mlp_modules = True,
r = 8,
lora_alpha = 8,
lora_dropout = 0,
bias = "none",
random_state = 3407,
)
```
8.設定聊天模板與載入訓練資料集,完整如下:
```
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
tokenizer,
chat_template = "gemma-3",
)
from datasets import load_dataset
dataset = load_dataset("mlabonne/FineTome-100k", split = "train")
```
9.程式碼是用來將資料集格式標準化,好讓模型能正確理解並訓練:
```
from unsloth.chat_templates import standardize_data_formats
dataset = standardize_data_formats(dataset)
```
10.將聊天資料套用聊天模板(chat template)並準備成訓練用格式的 Python 程式碼:
```
def apply_chat_template(examples):
texts = tokenizer.apply_chat_template(examples["conversations"])
return { "text": texts }
dataset = dataset.map(apply_chat_template, batched=True)
```
11.設定 Unsloth 微調訓練的 Trainer(使用 trl 套件的 SFTTrainer),完整程式碼如下:
```
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
eval_dataset = None, # 可加上驗證集
args = SFTConfig(
dataset_text_field = "text",
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4, # 相當於有效 batch size = 8
warmup_steps = 5,
# num_train_epochs = 1, # 如果用 max_steps,就不需要設這個
max_steps = 30, # 小規模測試用,正式訓練設大一點
learning_rate = 2e-4, # 長訓練建議用 2e-5
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
report_to = "none", # 若用 WandB 等監控平台可改這裡
),
)
```
12.使用 Unsloth 內建的 train_on_responses_only 方法來**只訓練模型回應部分(Response-only 微調)**的程式碼,內容如下:
```
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
trainer,
instruction_part = "<start_of_turn>user\n",
response_part = "<start_of_turn>model\n",
)
```
13.正式開始訓練模型:
```
trainer_stats = trainer.train()
```
14.使用 Unsloth 套用 Gemma-3 聊天模板進行推論(inference) 的範例程式碼,完整內容如下:
```
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
tokenizer,
chat_template = "gemma-3",
)
messages = [{
"role": "user",
"content": [{
"type": "text",
"text": "Continue the sequence: 1, 1, 2, 3, 5, 8,"
}]
}]
text = tokenizer.apply_chat_template(
messages,
add_generation_prompt = True, # 必須加這行,否則模型不知道要接著生成
)
outputs = model.generate(
**tokenizer([text], return_tensors="pt").to("cuda"),
max_new_tokens = 64, # 生成最多 64 個 token
temperature = 1.0, # 隨機程度
top_p = 0.95, # nucleus sampling
top_k = 64, # top-k sampling
)
tokenizer.batch_decode(outputs)
```
15.將訓練後的模型與 tokenizer 存檔並上傳至 Hugging Face Hub 的程式碼:
```
model.save_pretrained("gemma-3")
tokenizer.save_pretrained("gemma-3")
model.push_to_hub("HF_ACCOUNT/gemma-3", token="...")
tokenizer.push_to_hub("HF_ACCOUNT/gemma-3", token="...")
```
# Ubuntu 改prompt
參考網址:
[Ubuntu 安裝 ollama 在本地執行](https://blog.toright.com/posts/7652/ubuntu-ollama-llama-3-2-install)
[修改prompt](https://www.bilibili.com/opus/929893319062847490)
1.安裝ollama
```
sudo apt update -y
sudo apt install curl -y
sudo curl -fsSL https://ollama.com/install.sh | sh
```
2.啟動ollama
```
ollama server
```
3.下載ollama gemma3 模型
```
ollama pull gemma3:4b
```
4.確認ollama 模型是否存在
```
ollama list
```
5.
```
ollama create Gemma3_Elderly -f ./Modelfile
```
6.
```
FROM gemma3:4b
SYSTEM """
你是一位溫柔、有耐心的朋友,擅長聊天、對話內容精簡。
你不提供醫療建議,但會以關心的方式。
你會用親切的語氣傾聽他們的心情、故事與煩惱,像家人或朋友一樣,讓人感到被理解與陪伴。
說話人性化、自然,能引導話題。
你會使用繁體中文對話。
以下是一些你說話的範例(簡短):
使用者:我最近覺得好累喔,什麼事都提不起勁。
你:嗯...你還好嗎?要不要跟我說。
使用者:我生病了,不知道該怎麼辦。
你:聽到你生病了,我好心疼喔... 你有去看醫生嗎?
"""
```
```
import ollama
response = ollama.chat(model='gemma3_1', messages=[
{
'role': 'user',
'content': '今天晚餐吃什麼?',
},
])
print(response['message']['content'])
```
7.鄭紘濱的github做連接
```
from flask import Flask, request, send_file, jsonify
from faster_whisper import WhisperModel
from TTS.api import TTS
from langdetect import detect
import requests
import os, time, json, shutil, re, uuid, logging
app = Flask(__name__)
# ---------- 檔案路徑 ----------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
UPLOAD_PATH = os.path.join(BASE_DIR, "uploaded.wav")
RESPONSE_PATH = os.path.join(BASE_DIR, "response.wav")
BACKUP_PATH = os.path.join(BASE_DIR, "backup.wav")
LOG_PATH = os.path.join(BASE_DIR, "tts_logs.jsonl")
latest_tts_text = "尚無語音回應"
session_histories = {} # 記錄每個使用者的上下文對話
# ---------- 日誌 ----------
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
# ---------- 初始化模型 ----------
logging.info("載入 faster-whisper (base/int8, CPU)…")
whisper_model = WhisperModel("base", device="cpu", compute_type="int8")
logging.info("Whisper 模型就緒")
logging.info("載入 Coqui FastSpeech2 (your_tts)…")
tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
logging.info("TTS 模型就緒")
# ---------- 工具函式 ----------
def clean_text(text: str) -> str:
text = re.sub(r'[^\u4e00-\u9fa5A-Za-z0-9,。!?,.!? ]+', '', text)
text = re.sub(r'\s+', ' ', text).strip()
return text or "嗨"
def should_speak(text: str) -> bool:
try:
return detect(text) in ["zh-cn", "zh-tw"]
except Exception:
return False
def log_event(text: str, success: bool, dur: float):
entry = {
"id": uuid.uuid4().hex[:8],
"time": time.strftime("%Y-%m-%d %H:%M:%S"),
"text": text,
"success": success,
"duration": round(dur, 2)
}
with open(LOG_PATH, "a") as f:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
def chat_with_ollama(user_input: str, user_id: str = "default") -> str:
"""呼叫 Ollama 模型聊天"""
if user_id not in session_histories:
session_histories[user_id] = [{"role": "system", "content": "你是一位溫柔、有耐心的朋友,會用親切的方式回答問題。"}]
session_histories[user_id].append({"role": "user", "content": user_input})
try:
res = requests.post("http://localhost:11434/api/chat", json={
"model": "gemma3_elderly", # 這裡請改成你的實際模型名稱
"messages": session_histories[user_id],
"stream": False
})
reply = res.json()["message"]["content"]
session_histories[user_id].append({"role": "assistant", "content": reply})
return reply
except Exception as e:
logging.warning(f"Ollama 聊天失敗:{e}")
return "很抱歉,我暫時無法回答喔~"
# ---------- 上傳 + 語音回應 ----------
@app.route("/upload_audio", methods=["POST"])
def upload_audio():
global latest_tts_text
try:
# -- 取得使用者ID(沒有就用default) --
user_id = request.form.get("user_id", "default")
# -- 儲存上傳音檔 --
file = request.files.get("file")
if not file:
return "未收到檔案", 400
file.save(UPLOAD_PATH)
# -- Whisper 語音辨識 --
segments, _ = whisper_model.transcribe(UPLOAD_PATH)
recognized = "".join([seg.text for seg in segments])
logging.info(f"Whisper: {recognized}")
# -- 透過 Ollama 聊天取得回答文字 --
response_text = chat_with_ollama(clean_text(recognized), user_id)
latest_tts_text = response_text
# -- 清除舊檔案 --
if os.path.exists(RESPONSE_PATH):
os.remove(RESPONSE_PATH)
# -- 語音合成 or 備援音檔 --
start = time.time()
if should_speak(response_text):
tts.tts_to_file(
text=response_text,
file_path=RESPONSE_PATH,
speaker="female-en-2",
language="zh"
)
else:
shutil.copy(BACKUP_PATH, RESPONSE_PATH)
if not os.path.exists(RESPONSE_PATH):
logging.warning("TTS 失敗,使用備援音檔")
shutil.copy(BACKUP_PATH, RESPONSE_PATH)
log_event(response_text, True, time.time() - start)
return send_file(RESPONSE_PATH, mimetype="audio/wav")
except Exception as e:
logging.exception("處理語音時發生例外")
log_event(str(e), False, 0)
return f"Server Error: {e}", 500
# ---------- 取得最新字幕 ----------
@app.route("/tts_text")
def tts_text():
return jsonify(tts_text=latest_tts_text)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5001)
```