# 前言
MediaTek-Research的Llama-Breeze2-8B-Instruct從Llama3.2基礎上做預訓練,也就是說它可以理解圖片,當然包含繁體中文。但是它無法直接使用Kuwa GenAI OS(v0.3.4)平台上huggingface的executors進行掛載並執行推論任務。
# 方法
在不去嘗試調整Kuwa GenAI OS(v0.3.4)平台上的``C:\kuwa\GenAI OS\src\executor\huggingface.py``前提下,思考是否有別種方式能在Kuwa GenAI OS(v0.3.4)平台上運行。
## 安裝必要套件
若網路環境需設定proxy才能連到網際網路,需先設定proxy。
```bash
set "https_proxy=http://proxy.XXXX.com.tw:3128"
set "http_proxy=http://proxy.XXXX.com.tw:3128"
set "no_proxy=192.168.1.1,127.0.0.1,localhost"
```
安裝運行Breeze2的必要套件。若直接在Kuwa GenAI OS(v0.3.4)的Python聊天室執行可以少裝一些套件。Kuwa GenAI OS切換到指令模式方式為執行C:\kuwa\GenAI OS\windows\tool.bat,輸入cmd (會幫忙把環境變數設定好,這功能很重要,例如:本身沒裝git就可以拿到相關環境變數),再執行python指令。
```python
pip3 install transformers==4.47.0
pip3 install -U mtkresearch
pip install torch
pip install Image
#torchvision
pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0+cu121 -f https://download.pytorch.org/whl/torch_stable.html
pip install timm einops
```
## 直接在Kuwa GenAI OS(v0.3.4)的Python聊天室運行Breeze2
複製Breeze2在Hugging Face的Demo Code,直接在Kuwa GenAI OS(v0.3.4)的Python聊天室訊息輸入框中貼上並送出執行。

執行結果會出現在命令提示字元中。

## 想法
把個人解題愚見記錄下來。
沒往那條路走,不代表沒走過。
在一個不論專業,只談歷練的環境下。
各位有辦法走我曾經走過的路嗎?
### 修改原有`huggingface.py`
既然Kuwa GenAI OS(v0.3.4)的Python聊天室能執行Breeze2 Demo Code,曾經有想過要修改Kuwa GenAI OS(v0.3.4)平台上的``C:\kuwa\GenAI OS\src\executor\huggingface.py``,但感覺會牽一髮動全身,除錯除不完,還是想想其他簡單的方法。
曾經嘗試用perplexity去合併修改``huggingface.py``,但中文很難,從回應看得出perplexity與我的想法雞同鴨腳。人與人交談事情都有可能對牛彈琴了,更何況是對象是生成式AI。

### 相容於ollama或OpenAI的API呼叫方式
請perplexity生成相容於ollama或OpenAI的API呼叫方式。

這個想法很美好,但Kuwa GenAI OS(v0.3.4)不論使用ollama或chatgpt的executor都會出錯。

### 把它當Kuwa GenAI OS的Tool來運行
建立一個名為``Breeze2.py``的Tool。請perplexity生成呼叫API的程式碼如下:
```python=
import sys
import fileinput
from openai import OpenAI
def is_image_url(line):
image_exts = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff', '.svg')
return (
(line.startswith("http://") or line.startswith("https://")) and
line.lower().endswith(image_exts)
)
contents = []
for line in fileinput.input():
line = line.strip()
#print(line)
if not line:
continue
if is_image_url(line):
contents.append({"type": "image_url", "image_url": {"url": line}})
else:
contents.append({"type": "text", "text": line})
messages = [
{"role": "system", "content": "You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan."},
{
"role": "user",
"content": contents
}
]
client = OpenAI(
base_url="http://127.0.0.1:6666/v1",
api_key="sk-anything"
)
response = client.chat.completions.create(
model="llama-breeze2",
messages=messages
)
print(response.choices[0].message.content)
```
把``Breeze2.py``放到``C:\kuwa\GenAI OS\windows\root\bin``目錄下。

到Kuwa GenAI OS的商店建立Bot,模型設定檔內容如下:
```
PARAMETER pipe_program python
PARAMETER pipe_argv Breeze2.py
```

建立``BreezeAPI.py``,並把perplexity原本生成相容於ollama或OpenAI的API程式碼貼上。
```python=
import time
import json
import os
import torch
import tempfile
import requests
from flask import Flask, request, jsonify
from transformers import AutoModel, AutoTokenizer, GenerationConfig
from mtkresearch.llm.prompt import MRPromptV3
app = Flask(__name__)
def fetch_image_from_url(url):
"""下載圖片並回傳本地暫存檔路徑"""
resp = requests.get(url)
resp.raise_for_status()
suffix = os.path.splitext(url)[-1]
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
tmp.write(resp.content)
return tmp.name
class Breeze2API:
def __init__(self, model_id='MediaTek-Research/Llama-Breeze2-8B-Instruct-v0_1'):
self.model_id = model_id
self.prompt_engine = MRPromptV3()
self.special_tokens = {
'img_context_token_id': 128212,
'eos_token_id': 128009
}
self.generation_config = GenerationConfig(
max_new_tokens=2048,
do_sample=True,
temperature=0.01,
top_p=0.01,
repetition_penalty=1.1,
eos_token_id=self.special_tokens['eos_token_id']
)
self._load_model_and_tokenizer()
def _load_model_and_tokenizer(self):
self.model = AutoModel.from_pretrained(
self.model_id,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True,
# device_map='auto',
device_map={'': 'cuda:0'},
img_context_token_id=self.special_tokens['img_context_token_id']
).eval()
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_id,
trust_remote_code=True,
use_fast=False
)
def _prepare_prompt_and_pixel_values(self, messages):
# 將 OpenAI Vision 格式的 messages 轉為 Breeze2 格式
conversations = []
has_image = False
for msg in messages:
role = msg.get("role")
content = msg.get("content")
# 若 content 是 list,需處理多模態
if isinstance(content, list):
new_content = []
for item in content:
if item.get("type") == "image_url" and "image_url" in item:
url = item["image_url"]["url"]
local_path = fetch_image_from_url(url)
new_content.append({"type": "image", "image_path": local_path})
has_image = True
elif item.get("type") == "image" and "image_path" in item:
new_content.append(item)
has_image = True
elif item.get("type") == "text" and "text" in item:
new_content.append(item)
else:
# 忽略未知型別
pass
conversations.append({"role": role, "content": new_content})
else:
conversations.append({"role": role, "content": content})
if has_image:
prompt, pixel_values = self.prompt_engine.get_prompt(conversations)
return prompt, pixel_values
else:
prompt = self.prompt_engine.get_prompt(conversations)
return prompt, None
def _inference(self, prompt, pixel_values=None):
inputs = self.tokenizer(prompt, return_tensors="pt")
for k in inputs:
inputs[k] = inputs[k].to(self.model.device)
if pixel_values is not None:
pixel_values = pixel_values.to(self.model.device, dtype=self.model.dtype)
output_tensors = self.model.generate(
**inputs,
generation_config=self.generation_config,
pixel_values=pixel_values
)
else:
output_tensors = self.model.generate(
**inputs,
generation_config=self.generation_config
)
output_str = self.tokenizer.decode(output_tensors[0], skip_special_tokens=True)
return output_str
def chat(self, messages, functions=None):
prompt, pixel_values = self._prepare_prompt_and_pixel_values(messages)
if functions:
prompt_with_func = self.prompt_engine.get_prompt(messages, functions=functions)
output_str = self._inference(prompt_with_func)
result = self.prompt_engine.parse_generated_str(output_str)
if 'tool_calls' in result:
tool_call = result['tool_calls'][0]
func_name = tool_call['function']['name']
func_args = json.loads(tool_call['function']['arguments'])
func_result = self._execute_function(func_name, func_args)
messages.append(result)
messages.append({
'role': 'tool',
'tool_call_id': tool_call['id'],
'name': func_name,
'content': json.dumps(func_result)
})
prompt_final = self.prompt_engine.get_prompt(messages, functions=functions)
output_str_final = self._inference(prompt_final)
final_result = self.prompt_engine.parse_generated_str(output_str_final)
return final_result
else:
return result
else:
output_str = self._inference(prompt, pixel_values=pixel_values)
result = self.prompt_engine.parse_generated_str(output_str)
return result
def _execute_function(self, func_name, arguments):
if func_name == 'get_current_weather':
location = arguments.get('location')
unit = arguments.get('unit', 'celsius')
return {'temperature': 30, 'location': location, 'unit': unit}
else:
raise NotImplementedError(f"Function {func_name} 尚未實作")
breeze2_api = Breeze2API()
@app.route("/v1/chat/completions", methods=["POST"])
def chat_completions():
# OpenAI 風格認證(可選:不檢查內容只檢查格式)
auth = request.headers.get("Authorization", "")
if not auth.startswith("Bearer "):
return jsonify({
"error": {
"message": "Invalid authentication credentials",
"type": "invalid_request_error",
"param": None,
"code": "401"
}
}), 401
data = request.json
messages = data.get("messages", [])
functions = data.get("functions", None)
try:
result = breeze2_api.chat(messages, functions=functions)
except Exception as e:
return jsonify({
"error": {
"message": str(e),
"type": "internal_error",
"param": None,
"code": "500"
}
}), 500
response = {
"id": "chatcmpl-001",
"object": "chat.completion",
"created": int(time.time()),
"model": data.get("model", "llama-breeze2"),
"choices": [
{
"index": 0,
"message": {
"role": result.get("role", "assistant"),
"content": result.get("content", ""),
"function_call": result.get("tool_calls", None)
},
"finish_reason": "stop"
}
]
}
return jsonify(response)
if __name__ == "__main__":
app.run(host="127.0.0.1", port=6666)
```
## 收工
在Kuwa GenAI OS的管理頁面,設定群組的模型使用權限,開放使用Pipe。
就可以看到名為Llama3.2 Breeze2的Tool,缺點無法連貫問答,對談都是單筆問答。



# 參考資料
* MediaTek-Research/Llama-Breeze2-8B-Instruct https://huggingface.co/MediaTek-Research/Llama-Breeze2-8B-Instruct
* Demo: Breeze 2 8B https://www.kaggle.com/code/ycckaggle/demo-breeze-2-8b
* PyTorch安裝指南 https://hackmd.io/@winniemyiwen/PyTorch_installation