# 用 Qwen3 1.7B 打造你的 macOS 桌面自動化 Agent 完整指南

想像一下,你對著電腦說「幫我整理下載資料夾,把所有 PDF 移到文件夾」,然後它就自動完成了。這不是科幻電影,而是你今天就能在自己的 Mac 上實現的事情。
本文將帶你從零開始,使用 Qwen3 1.7B 這個完全開源的小型語言模型,搭建一個能夠理解自然語言並執行實際操作的桌面自動化 Agent。
> [!tip] 為什麼選擇 Qwen3 1.7B?
> - **完全本地運行**:不需要 API 金鑰,不需要網路連線
> - **隱私保護**:你的指令和資料永遠不會離開你的電腦
> - **原生 Function Calling**:專門為工具調用優化,準確率高
> - **資源友好**:僅需 ~1GB 記憶體,M1/M2 Mac 流暢運行
> - **Apache 2.0 授權**:完全開源,商用免費
---
## 架構概覽

我們要搭建的 Agent 遵循標準的 ReAct(Reasoning + Acting)架構:
```mermaid
flowchart LR
A[用戶輸入] --> B[Qwen3 1.7B]
B --> C{Function Calling}
C --> D[檔案操作]
C --> E[系統指令]
C --> F[應用控制]
C --> G[網頁操作]
D --> H[執行結果]
E --> H
F --> H
G --> H
H --> B
B --> I[回覆用戶]
```
---
## 環境準備
### 第一步:安裝 Ollama
Ollama 是在 Mac 上運行本地 LLM 的最佳選擇,安裝過程非常簡單:
```bash
# 方法一:使用 Homebrew(推薦)
brew install ollama
# 方法二:直接下載
# 前往 https://ollama.com/download 下載 macOS 版本
```
安裝完成後,啟動 Ollama 服務:
```bash
# 啟動服務(會在背景運行)
ollama serve
```
### 第二步:下載 Qwen3 1.7B 模型
```bash
# 下載模型(約 1GB)
ollama pull qwen3:1.7b
# 驗證安裝
ollama list
```
> [!note] 模型變體選擇
> Ollama 提供多種 Qwen3 版本:
> - `qwen3:0.6b` - 最輕量,適合資源受限環境
> - `qwen3:1.7b` - 平衡之選,Function Calling 表現優秀
> - `qwen3:4b` - 更強能力,需要更多記憶體
> - `qwen3:8b` - 最強能力,建議 16GB+ RAM
### 第三步:設置 Python 環境
```bash
# 建立虛擬環境
python3 -m venv agent-env
source agent-env/bin/activate
# 安裝必要套件
pip install ollama pyobjc pyautogui
# 可選:安裝 Qwen-Agent 框架(功能更強大)
pip install -U "qwen-agent[gui,rag,code_interpreter,mcp]"
```
---
## 基礎實作:最小可行 Agent
讓我們從最簡單的版本開始,理解核心概念:
### 定義工具函數
```python
# agent_tools.py
import os
import subprocess
from datetime import datetime
def get_current_time() -> str:
"""取得目前時間"""
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def list_files(directory: str) -> str:
"""列出指定目錄的檔案"""
try:
files = os.listdir(os.path.expanduser(directory))
return "\n".join(files[:20]) # 限制回傳數量
except Exception as e:
return f"錯誤: {str(e)}"
def move_file(source: str, destination: str) -> str:
"""移動檔案"""
try:
src = os.path.expanduser(source)
dst = os.path.expanduser(destination)
os.rename(src, dst)
return f"成功將 {source} 移動到 {destination}"
except Exception as e:
return f"錯誤: {str(e)}"
def run_shell_command(command: str) -> str:
"""執行 shell 指令(限制危險操作)"""
# 安全檢查:禁止危險指令
dangerous = ['rm -rf', 'sudo', 'mkfs', 'dd if=']
if any(d in command for d in dangerous):
return "錯誤: 不允許執行危險指令"
try:
result = subprocess.run(
command,
shell=True,
capture_output=True,
text=True,
timeout=30
)
return result.stdout or result.stderr or "指令執行完成"
except subprocess.TimeoutExpired:
return "錯誤: 指令執行超時"
except Exception as e:
return f"錯誤: {str(e)}"
def open_application(app_name: str) -> str:
"""開啟 macOS 應用程式"""
try:
subprocess.run(['open', '-a', app_name], check=True)
return f"已開啟 {app_name}"
except Exception as e:
return f"錯誤: {str(e)}"
# 工具定義(供 LLM 使用)
TOOLS = [
{
"type": "function",
"function": {
"name": "get_current_time",
"description": "取得目前的日期和時間",
"parameters": {"type": "object", "properties": {}, "required": []}
}
},
{
"type": "function",
"function": {
"name": "list_files",
"description": "列出指定目錄中的檔案和資料夾",
"parameters": {
"type": "object",
"properties": {
"directory": {
"type": "string",
"description": "目錄路徑,例如 ~/Downloads 或 /Users/name/Documents"
}
},
"required": ["directory"]
}
}
},
{
"type": "function",
"function": {
"name": "move_file",
"description": "將檔案從一個位置移動到另一個位置",
"parameters": {
"type": "object",
"properties": {
"source": {"type": "string", "description": "來源檔案路徑"},
"destination": {"type": "string", "description": "目標路徑"}
},
"required": ["source", "destination"]
}
}
},
{
"type": "function",
"function": {
"name": "run_shell_command",
"description": "執行 shell 指令,用於系統操作",
"parameters": {
"type": "object",
"properties": {
"command": {"type": "string", "description": "要執行的 shell 指令"}
},
"required": ["command"]
}
}
},
{
"type": "function",
"function": {
"name": "open_application",
"description": "開啟 macOS 應用程式",
"parameters": {
"type": "object",
"properties": {
"app_name": {"type": "string", "description": "應用程式名稱,如 Safari、Finder、Notes"}
},
"required": ["app_name"]
}
}
}
]
# 函數映射
FUNCTION_MAP = {
"get_current_time": get_current_time,
"list_files": list_files,
"move_file": move_file,
"run_shell_command": run_shell_command,
"open_application": open_application
}
```
### 建立 Agent 核心
```python
# agent_core.py
import json
from ollama import chat
from agent_tools import TOOLS, FUNCTION_MAP
class MacOSAgent:
def __init__(self, model: str = "qwen3:1.7b"):
self.model = model
self.conversation_history = []
self.system_prompt = """你是一個 macOS 桌面自動化助手。
你可以幫助用戶執行檔案管理、開啟應用程式、執行系統指令等任務。
請使用繁體中文回覆。
重要規則:
1. 在執行任何操作前,先確認用戶的意圖
2. 對於可能影響系統的操作,要謹慎處理
3. 執行完成後,清楚回報結果
4. 如果遇到錯誤,提供解決建議"""
def process_tool_calls(self, tool_calls):
"""處理工具調用並返回結果"""
results = []
for tool_call in tool_calls:
func_name = tool_call.function.name
# 解析參數
try:
args = json.loads(tool_call.function.arguments)
except json.JSONDecodeError:
args = {}
# 執行函數
if func_name in FUNCTION_MAP:
print(f" 🔧 執行工具: {func_name}")
print(f" 參數: {args}")
result = FUNCTION_MAP[func_name](**args)
print(f" 結果: {result[:100]}..." if len(str(result)) > 100 else f" 結果: {result}")
results.append({
"role": "tool",
"content": result
})
else:
results.append({
"role": "tool",
"content": f"未知工具: {func_name}"
})
return results
def chat(self, user_message: str) -> str:
"""與 Agent 對話"""
# 添加用戶訊息
self.conversation_history.append({
"role": "user",
"content": user_message
})
# 準備完整訊息
messages = [
{"role": "system", "content": self.system_prompt}
] + self.conversation_history
# 調用 LLM
response = chat(
model=self.model,
messages=messages,
tools=TOOLS
)
assistant_message = response.message
# 檢查是否有工具調用
if assistant_message.tool_calls:
# 添加助手的工具調用訊息
self.conversation_history.append({
"role": "assistant",
"content": assistant_message.content or "",
"tool_calls": [
{
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments
}
} for tc in assistant_message.tool_calls
]
})
# 執行工具並獲取結果
tool_results = self.process_tool_calls(assistant_message.tool_calls)
self.conversation_history.extend(tool_results)
# 讓 LLM 根據工具結果生成最終回覆
messages = [
{"role": "system", "content": self.system_prompt}
] + self.conversation_history
final_response = chat(
model=self.model,
messages=messages,
tools=TOOLS
)
final_content = final_response.message.content
self.conversation_history.append({
"role": "assistant",
"content": final_content
})
return final_content
else:
# 沒有工具調用,直接返回回覆
self.conversation_history.append({
"role": "assistant",
"content": assistant_message.content
})
return assistant_message.content
def reset(self):
"""重置對話歷史"""
self.conversation_history = []
def main():
"""互動式 Agent 介面"""
print("=" * 50)
print("🤖 macOS 桌面自動化 Agent")
print(" 模型: Qwen3 1.7B (本地運行)")
print("=" * 50)
print("輸入 'quit' 退出,'reset' 重置對話\n")
agent = MacOSAgent()
while True:
try:
user_input = input("👤 你: ").strip()
if not user_input:
continue
if user_input.lower() == 'quit':
print("再見!")
break
if user_input.lower() == 'reset':
agent.reset()
print("對話已重置。\n")
continue
print("\n🤖 Agent: ", end="")
response = agent.chat(user_input)
print(response)
print()
except KeyboardInterrupt:
print("\n再見!")
break
if __name__ == "__main__":
main()
```
---
## 進階功能:macOS 專屬自動化
### AppleScript 整合
macOS 的殺手鐧是 AppleScript,它能控制幾乎所有原生應用:
```python
# applescript_tools.py
import subprocess
def run_applescript(script: str) -> str:
"""執行 AppleScript"""
try:
result = subprocess.run(
['osascript', '-e', script],
capture_output=True,
text=True,
timeout=30
)
return result.stdout.strip() or result.stderr.strip() or "執行完成"
except Exception as e:
return f"錯誤: {str(e)}"
def get_frontmost_app() -> str:
"""取得目前最前方的應用程式"""
script = '''
tell application "System Events"
return name of first application process whose frontmost is true
end tell
'''
return run_applescript(script)
def set_volume(level: int) -> str:
"""設定系統音量 (0-100)"""
level = max(0, min(100, level))
script = f'set volume output volume {level}'
run_applescript(script)
return f"音量已設為 {level}%"
def send_notification(title: str, message: str) -> str:
"""發送系統通知"""
script = f'''
display notification "{message}" with title "{title}"
'''
run_applescript(script)
return "通知已發送"
def get_clipboard() -> str:
"""取得剪貼簿內容"""
script = 'return the clipboard'
return run_applescript(script)
def set_clipboard(content: str) -> str:
"""設定剪貼簿內容"""
# 轉義特殊字元
content = content.replace('\\', '\\\\').replace('"', '\\"')
script = f'set the clipboard to "{content}"'
run_applescript(script)
return "已複製到剪貼簿"
def create_reminder(title: str, due_date: str = None) -> str:
"""在提醒事項中建立新提醒"""
if due_date:
script = f'''
tell application "Reminders"
make new reminder with properties {{name:"{title}", due date:date "{due_date}"}}
end tell
'''
else:
script = f'''
tell application "Reminders"
make new reminder with properties {{name:"{title}"}}
end tell
'''
run_applescript(script)
return f"提醒 '{title}' 已建立"
def create_calendar_event(title: str, start_time: str, end_time: str) -> str:
"""在行事曆建立新事件"""
script = f'''
tell application "Calendar"
tell calendar "行事曆"
make new event with properties {{summary:"{title}", start date:date "{start_time}", end date:date "{end_time}"}}
end tell
end tell
'''
run_applescript(script)
return f"事件 '{title}' 已建立"
def get_safari_url() -> str:
"""取得 Safari 目前頁面的 URL"""
script = '''
tell application "Safari"
return URL of current tab of front window
end tell
'''
return run_applescript(script)
def open_url_in_safari(url: str) -> str:
"""在 Safari 開啟 URL"""
script = f'''
tell application "Safari"
activate
open location "{url}"
end tell
'''
run_applescript(script)
return f"已在 Safari 開啟 {url}"
```
### PyAutoGUI 視覺自動化
對於沒有 AppleScript 支援的應用程式,可以使用 PyAutoGUI:
```python
# gui_automation.py
import pyautogui
import time
# 安全設定
pyautogui.FAILSAFE = True # 移動到螢幕角落可以中止
pyautogui.PAUSE = 0.5 # 每個動作間隔 0.5 秒
def take_screenshot(filename: str = None) -> str:
"""擷取螢幕截圖"""
if filename is None:
filename = f"screenshot_{int(time.time())}.png"
screenshot = pyautogui.screenshot()
screenshot.save(filename)
return f"截圖已保存: {filename}"
def click_at(x: int, y: int) -> str:
"""在指定座標點擊"""
pyautogui.click(x, y)
return f"已點擊座標 ({x}, {y})"
def type_text(text: str, interval: float = 0.05) -> str:
"""輸入文字"""
pyautogui.typewrite(text, interval=interval)
return f"已輸入: {text}"
def hotkey(*keys) -> str:
"""按下組合鍵"""
pyautogui.hotkey(*keys)
return f"已按下組合鍵: {'+'.join(keys)}"
def scroll_screen(clicks: int, x: int = None, y: int = None) -> str:
"""滾動螢幕"""
pyautogui.scroll(clicks, x=x, y=y)
direction = "上" if clicks > 0 else "下"
return f"已向{direction}滾動 {abs(clicks)} 格"
def get_mouse_position() -> str:
"""取得目前滑鼠位置"""
x, y = pyautogui.position()
return f"滑鼠位置: ({x}, {y})"
def locate_on_screen(image_path: str) -> str:
"""在螢幕上尋找圖片位置"""
try:
location = pyautogui.locateOnScreen(image_path, confidence=0.9)
if location:
center = pyautogui.center(location)
return f"找到圖片,中心位置: ({center.x}, {center.y})"
return "未找到圖片"
except Exception as e:
return f"錯誤: {str(e)}"
```
---
## 完整整合:Production-Ready Agent
將所有工具整合為一個完整的 Agent:
```python
# macos_agent.py
import json
from ollama import chat
from agent_tools import TOOLS as BASE_TOOLS, FUNCTION_MAP as BASE_FUNCTIONS
from applescript_tools import (
run_applescript, get_frontmost_app, set_volume,
send_notification, get_clipboard, set_clipboard,
create_reminder, get_safari_url, open_url_in_safari
)
from gui_automation import (
take_screenshot, click_at, type_text, hotkey,
scroll_screen, get_mouse_position
)
# 擴展工具定義
EXTENDED_TOOLS = BASE_TOOLS + [
{
"type": "function",
"function": {
"name": "set_volume",
"description": "設定系統音量",
"parameters": {
"type": "object",
"properties": {
"level": {"type": "integer", "description": "音量等級 (0-100)"}
},
"required": ["level"]
}
}
},
{
"type": "function",
"function": {
"name": "send_notification",
"description": "發送 macOS 系統通知",
"parameters": {
"type": "object",
"properties": {
"title": {"type": "string", "description": "通知標題"},
"message": {"type": "string", "description": "通知內容"}
},
"required": ["title", "message"]
}
}
},
{
"type": "function",
"function": {
"name": "create_reminder",
"description": "在提醒事項 App 建立新提醒",
"parameters": {
"type": "object",
"properties": {
"title": {"type": "string", "description": "提醒標題"},
"due_date": {"type": "string", "description": "到期日期,格式如 'January 15, 2025 10:00 AM'"}
},
"required": ["title"]
}
}
},
{
"type": "function",
"function": {
"name": "take_screenshot",
"description": "擷取螢幕截圖",
"parameters": {
"type": "object",
"properties": {
"filename": {"type": "string", "description": "儲存檔名(可選)"}
},
"required": []
}
}
},
{
"type": "function",
"function": {
"name": "get_clipboard",
"description": "取得剪貼簿內容",
"parameters": {"type": "object", "properties": {}, "required": []}
}
},
{
"type": "function",
"function": {
"name": "set_clipboard",
"description": "設定剪貼簿內容",
"parameters": {
"type": "object",
"properties": {
"content": {"type": "string", "description": "要複製的內容"}
},
"required": ["content"]
}
}
},
{
"type": "function",
"function": {
"name": "open_url_in_safari",
"description": "在 Safari 瀏覽器開啟網址",
"parameters": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "要開啟的網址"}
},
"required": ["url"]
}
}
}
]
# 擴展函數映射
EXTENDED_FUNCTIONS = {
**BASE_FUNCTIONS,
"set_volume": set_volume,
"send_notification": send_notification,
"create_reminder": create_reminder,
"take_screenshot": take_screenshot,
"get_clipboard": get_clipboard,
"set_clipboard": set_clipboard,
"open_url_in_safari": open_url_in_safari,
}
class ProductionAgent:
"""Production-ready macOS 自動化 Agent"""
def __init__(self, model: str = "qwen3:1.7b"):
self.model = model
self.history = []
self.max_iterations = 5 # 防止無限循環
def run(self, task: str) -> str:
"""執行任務"""
self.history = [{"role": "user", "content": task}]
system = """你是一個專業的 macOS 桌面自動化助手。
你的能力包括:
- 檔案和資料夾管理
- 系統設定調整
- 應用程式控制
- 提醒事項和行事曆管理
- 螢幕截圖
- 剪貼簿操作
- 網頁瀏覽
執行任務時:
1. 分析用戶需求,選擇合適的工具
2. 一步一步執行,確認每步結果
3. 遇到問題時提供解決方案
4. 完成後總結執行結果
請使用繁體中文回覆。"""
for i in range(self.max_iterations):
response = chat(
model=self.model,
messages=[{"role": "system", "content": system}] + self.history,
tools=EXTENDED_TOOLS
)
msg = response.message
if msg.tool_calls:
# 記錄工具調用
self.history.append({
"role": "assistant",
"content": msg.content or "",
"tool_calls": [
{"function": {"name": tc.function.name, "arguments": tc.function.arguments}}
for tc in msg.tool_calls
]
})
# 執行工具
for tc in msg.tool_calls:
func_name = tc.function.name
try:
args = json.loads(tc.function.arguments)
except:
args = {}
if func_name in EXTENDED_FUNCTIONS:
result = EXTENDED_FUNCTIONS[func_name](**args)
else:
result = f"未知工具: {func_name}"
self.history.append({"role": "tool", "content": str(result)})
else:
# 沒有工具調用,任務完成
self.history.append({"role": "assistant", "content": msg.content})
return msg.content
return "任務執行超過最大迭代次數"
if __name__ == "__main__":
agent = ProductionAgent()
# 測試範例
tasks = [
"現在幾點了?",
"幫我把音量調到 50%",
"列出我下載資料夾裡的檔案",
"發送一個通知提醒我開會"
]
for task in tasks:
print(f"\n📋 任務: {task}")
print(f"🤖 回應: {agent.run(task)}")
print("-" * 40)
```
---
## 實用範例
### 範例一:整理下載資料夾
```
👤: 幫我把下載資料夾裡的 PDF 檔案移到文件資料夾
🤖: 我來幫你整理。首先讓我看看下載資料夾裡有什麼...
🔧 執行工具: list_files
參數: {"directory": "~/Downloads"}
結果: report.pdf, image.png, video.mp4, document.pdf...
找到 2 個 PDF 檔案。現在移動它們...
🔧 執行工具: move_file
參數: {"source": "~/Downloads/report.pdf", "destination": "~/Documents/report.pdf"}
結果: 成功移動
🔧 執行工具: move_file
參數: {"source": "~/Downloads/document.pdf", "destination": "~/Documents/document.pdf"}
結果: 成功移動
完成!已將 2 個 PDF 檔案移動到文件資料夾。
```
### 範例二:建立提醒
```
👤: 提醒我明天下午三點開會
🤖: 🔧 執行工具: create_reminder
參數: {"title": "開會", "due_date": "January 7, 2025 3:00 PM"}
結果: 提醒 '開會' 已建立
已在提醒事項中建立「開會」提醒,時間設定為明天下午 3:00。
```
### 範例三:快速工作流
```
👤: 開啟 Safari 並幫我截圖
🤖: 🔧 執行工具: open_application
參數: {"app_name": "Safari"}
結果: 已開啟 Safari
🔧 執行工具: take_screenshot
參數: {"filename": "safari_screenshot.png"}
結果: 截圖已保存: safari_screenshot.png
Safari 已開啟,螢幕截圖已保存為 safari_screenshot.png。
```
---
## 安全考量
> [!warning] 重要安全提醒
> 讓 AI 控制你的電腦是有風險的。請務必實作以下安全措施:
### 1. 指令白名單
```python
SAFE_COMMANDS = {
'ls', 'pwd', 'echo', 'cat', 'head', 'tail',
'mkdir', 'cp', 'mv', 'find', 'grep'
}
def is_safe_command(command: str) -> bool:
"""檢查指令是否安全"""
first_word = command.split()[0] if command.split() else ""
return first_word in SAFE_COMMANDS
```
### 2. 確認機制
```python
def execute_with_confirmation(action: str, func, *args):
"""執行前要求用戶確認"""
print(f"⚠️ 即將執行: {action}")
confirm = input("確認執行?(y/n): ")
if confirm.lower() == 'y':
return func(*args)
return "操作已取消"
```
### 3. 沙箱目錄
```python
SANDBOX_DIR = os.path.expanduser("~/AgentSandbox")
def validate_path(path: str) -> bool:
"""確保路徑在沙箱範圍內"""
abs_path = os.path.abspath(os.path.expanduser(path))
return abs_path.startswith(SANDBOX_DIR)
```
---
## 延伸閱讀
- [[深度研究_FunctionGemma使用場景與類似模型比較_20250106]] - Function Calling 模型比較
- [Ollama 官方文檔](https://ollama.com/library/qwen3)
- [Qwen3 技術報告](https://qwenlm.github.io/blog/qwen3/)
- [PyAutoGUI 文檔](https://pyautogui.readthedocs.io/)
- [AppleScript 入門指南](https://developer.apple.com/library/archive/documentation/AppleScript/Conceptual/AppleScriptLangGuide/)
---
## 結語
使用 Qwen3 1.7B 搭建本地自動化 Agent 是一個完美的起點。它足夠小巧可以在任何現代 Mac 上流暢運行,同時具備出色的 Function Calling 能力。
從這個基礎出發,你可以:
- 添加更多工具(郵件、訊息、音樂控制等)
- 整合 MCP 協議連接更多服務
- 加入語音輸入實現真正的語音助手
- 建立自定義工作流自動化日常任務
本地 AI 的時代已經來臨,而你的 Mac 正是最佳的實驗場所。
---
*本文最後更新:2025-01-06*