# Automated Camera Tracking Workflow (COLMAP + GLOMAP)
本文件說明如何使用 Python 進行影片的自動化相機追蹤與 NeRF 格式轉換。
## 1. 環境準備 (Prerequisites)
在執行腳本之前,請確保以下軟體已安裝並設定於系統環境變數 (System PATH) 中,以便腳本可以直接呼叫命令。
### 必備軟體
1. **FFmpeg**
* 用途:將影片拆解為圖片序列。
* 下載:FFmpeg 官網
* 設定:將 `bin` 資料夾路徑加入 PATH (例如 `C:\ffmpeg\bin`)。
* 驗證:在終端機輸入 `ffmpeg -version`。
2. **COLMAP** (建議使用 CUDA 版本)
* 用途:特徵提取 (Feature Extraction) 與 序列匹配 (Sequential Matching)。
* 下載:COLMAP GitHub
* 設定:將包含 `colmap.exe` 的資料夾路徑加入 PATH。
* 驗證:在終端機輸入 `colmap --help`。
3. **GLOMAP**
* 用途:全域稀疏重建 (Global Sparse Reconstruction),比傳統 COLMAP Mapper 更快且對影片序列更穩健。
* 下載:GLOMAP GitHub
* 設定:將包含 `glomap.exe` 的資料夾路徑加入 PATH。
* 驗證:在終端機輸入 `glomap --help`。
4. **Python Libraries**
* 請確保安裝必要的 Python 套件:
```bash
pip install numpy opencv-python
```
---
## 2. 自動化追蹤 (Step 1: AutoTracker)
此腳本會批次處理影片資料夾,自動執行:拆幀 -> 特徵提取 -> 序列匹配 -> GLOMAP 重建 -> 轉換模型為 TXT 格式。
```python
import os
import sys
import subprocess
import glob
import argparse
# System Binaries (Ensure these are in your PATH)
FFMPEG = "ffmpeg"
COLMAP = "colmap"
GLOMAP = "glomap"
def run_command(cmd, error_msg, quiet=False):
"""Runs a subprocess command. Returns True on success, False on failure."""
try:
kwargs = {}
if quiet:
kwargs['stdout'] = subprocess.DEVNULL
kwargs['stderr'] = subprocess.DEVNULL
# Run command
subprocess.run(cmd, check=True, **kwargs)
return True
except subprocess.CalledProcessError:
print(error_msg)
return False
except FileNotFoundError:
print(f" [ERROR] Binary not found: {cmd[0]}")
print(error_msg)
return False
def process_video(video_path, scenes_dir, idx, total, overlap=12, scale=1.0):
# Get base name and extension
base_name = os.path.splitext(os.path.basename(video_path))[0]
ext = os.path.splitext(video_path)[1]
print(f"\n[{idx}/{total}] === Processing \"{base_name}{ext}\" ===")
# Directory layout
scene_path = os.path.join(scenes_dir, base_name)
img_dir = os.path.join(scene_path, "images")
sparse_dir = os.path.join(scene_path, "sparse")
database_path = os.path.join(scene_path, "database.db")
# Skip if already reconstructed
if os.path.exists(scene_path):
print(f" • Skipping \"{base_name}\" – already reconstructed.")
return
# Clean slate
try:
os.makedirs(img_dir, exist_ok=True)
os.makedirs(sparse_dir, exist_ok=True)
except OSError as e:
print(f" [ERROR] Could not create directories: {e}")
return
# 1) Extract every frame
print(" [1/4] Extracting frames ...")
frame_pattern = os.path.join(img_dir, "frame_%06d.jpg")
cmd_ffmpeg = [
FFMPEG, "-loglevel", "error", "-stats", "-i", video_path,
"-qscale:v", "2"
]
if scale != 1.0:
cmd_ffmpeg.extend(["-vf", f"scale=iw*{scale}:ih*{scale}"])
cmd_ffmpeg.append(frame_pattern)
if not run_command(cmd_ffmpeg, f" × FFmpeg failed – skipping \"{base_name}\"."):
return
# Check if frames were extracted
if not glob.glob(os.path.join(img_dir, "*.jpg")):
print(f" × No frames extracted – skipping \"{base_name}\".")
return
# 2) Feature extraction (COLMAP)
print(" [2/4] COLMAP feature_extractor ...")
cmd_colmap_fe = [
COLMAP, "feature_extractor",
"--database_path", database_path,
"--image_path", img_dir,
"--ImageReader.single_camera", "1",
"--SiftExtraction.use_gpu", "1"
]
if not run_command(cmd_colmap_fe, f" × feature_extractor failed – skipping \"{base_name}\"."):
return
# 3) Sequential matching (COLMAP)
print(" [3/4] COLMAP sequential_matcher ...")
cmd_colmap_sm = [
COLMAP, "sequential_matcher",
"--database_path", database_path,
"--SequentialMatching.overlap", str(overlap)
]
if not run_command(cmd_colmap_sm, f" × sequential_matcher failed – skipping \"{base_name}\"."):
return
# 4) Sparse reconstruction (GLOMAP)
print(" [4/4] GLOMAP mapper ...")
cmd_glomap = [
GLOMAP, "mapper",
"--database_path", database_path,
"--image_path", img_dir,
"--output_path", sparse_dir
]
if not run_command(cmd_glomap, f" × glomap mapper failed – skipping \"{base_name}\"."):
return
# Export TXT inside the model folder
# Keep TXT next to BIN so Blender can import from sparse\0 directly.
sparse_0_dir = os.path.join(sparse_dir, "0")
if os.path.exists(sparse_0_dir):
cmd_convert_1 = [
COLMAP, "model_converter",
"--input_path", sparse_0_dir,
"--output_path", sparse_0_dir,
"--output_type", "TXT"
]
run_command(cmd_convert_1, " [WARN] Failed to export TXT to sparse/0", quiet=True)
# Export TXT to parent sparse\ (for Blender auto-detect)
cmd_convert_2 = [
COLMAP, "model_converter",
"--input_path", sparse_0_dir,
"--output_path", sparse_dir,
"--output_type", "TXT"
]
run_command(cmd_convert_2, " [WARN] Failed to export TXT to sparse/", quiet=True)
print(f" ✓ Finished \"{base_name}\" ({idx}/{total})")
def main():
parser = argparse.ArgumentParser(description="Batch script for automated photogrammetry tracking workflow.")
parser.add_argument("videos_dir", help="Directory containing input videos")
parser.add_argument("scenes_dir", help="Directory to output scenes")
parser.add_argument("--overlap", type=int, default=12, help="Sequential matching overlap (default: 12)")
parser.add_argument("--scale", type=float, default=1.0, help="Image scaling factor (default: 1.0)")
# If no arguments provided, print help
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
videos_dir = os.path.abspath(args.videos_dir)
scenes_dir = os.path.abspath(args.scenes_dir)
# Ensure required folders exist
if not os.path.isdir(videos_dir):
print(f"[ERROR] Input folder \"{videos_dir}\" missing.")
input("Press Enter to exit...")
sys.exit(1)
try:
os.makedirs(scenes_dir, exist_ok=True)
except OSError as e:
print(f"[ERROR] Could not create output folder \"{scenes_dir}\": {e}")
sys.exit(1)
# Count videos
# Filter for files only
video_files = [f for f in os.listdir(videos_dir) if os.path.isfile(os.path.join(videos_dir, f))]
total = len(video_files)
if total == 0:
print(f"[INFO] No video files found in \"{videos_dir}\".")
input("Press Enter to exit...")
sys.exit(0)
print("==============================================================")
print(f" Starting GLOMAP pipeline on {total} video(s) ...")
print("==============================================================")
for idx, video_file in enumerate(video_files, 1):
process_video(os.path.join(videos_dir, video_file), scenes_dir, idx, total, overlap=args.overlap, scale=args.scale)
print("--------------------------------------------------------------")
print(f" All jobs finished – results are in \"{scenes_dir}\".")
print("--------------------------------------------------------------")
input("Press Enter to exit...")
if __name__ == "__main__":
main()
```
### 參數說明
* `videos_dir` (必要): 包含 `.mp4`, `.mov` 等影片檔的資料夾。
* `scenes_dir` (必要): 輸出結果的資料夾。
* `--overlap`: (選填, 預設 12) 序列匹配的重疊張數。
* 動作快或幀率低的影片可嘗試調高 (例如 20)。
* `--scale`: (選填, 預設 1.0) 圖片縮放比例。
* `1.0`: 原始解析度。
* `0.5`: 長寬各縮小一半 (速度較快)。
---
## 3. 轉換為 NeRF 格式 (Step 2: colmap2nerf)
若需要 `transforms.json` (NeRF/Instant-NGP 格式),可使用 `colmap2nerf.py` 讀取上一步驟產生的 COLMAP TXT 資料。
此腳本源自 Instant-NGP GitHub。
注意:建議先安裝好 Nerfstudio,以確保相關 Python 依賴庫完整。
由於之前已經完成了重建並輸出了 TXT 檔在 `sparse\0` 資料夾中,我們不需要加 `--run_colmap` 參數。
### 關鍵參數
* `--colmap_db`: 指向產生的 `database.db`。
* `--images`: 指向產生的 `images` 資料夾。
* `--text`: 指向產生的 `sparse\0` 資料夾 (裡面包含 cameras.txt, images.txt, points3D.txt)。
* `--keep_colmap_coords`: 保持 COLMAP 原始座標系 (不進行自動置中與旋轉),這對於後續 VFX 流程整合非常重要。
* `--out`: 輸出的 json 檔案路徑。
---
## 4. 鏡頭校正與去畸變 (Step 3: Undistortion)
使用以下腳本將圖片進行去畸變 (Undistortion),並同步校正 `transforms.json` 中的相機參數 (Intrinsics),使其適用於 Houdini 或其他需要 Linear Pinhole Camera 的軟體。
> **注意:** `transforms.json` 中的 `file_path` 通常是相對於 json 檔案的路徑。請確保執行腳本時,`transforms.json` 與圖片資料夾(例如 `images`)的相對位置是正確的,否則腳本可能會找不到圖片。
```python
import json
import cv2
import numpy as np
import os
from pathlib import Path
import argparse
# ==============================================================================
# Configuration Area
# ==============================================================================
# Whether to crop the black borders generated after undistortion?
# True: Crop (FOV will be slightly narrower, but full frame)
# False: Keep black borders (Maximize FOV, but image edges will have black curved areas)
CROP_TO_VALID = True
# ==============================================================================
def undistort_process(json_path, output_dir, crop_to_valid):
# Convert paths to absolute to handle relative paths correctly
json_path = Path(os.path.abspath(json_path))
output_path = Path(os.path.abspath(output_dir))
if not json_path.exists():
print(f"Error: JSON file not found: {json_path}")
return
# Create output directory
images_out_dir = output_path / "images_undistorted"
images_out_dir.mkdir(parents=True, exist_ok=True)
print(f"Reading JSON: {json_path}")
with open(json_path, 'r') as f:
data = json.load(f)
# 1. Read camera parameters
w = int(data.get("w", 1920))
h = int(data.get("h", 1080))
fl_x = float(data.get("fl_x", 1000))
fl_y = float(data.get("fl_y", fl_x)) # If fl_y is missing, it usually defaults to fl_x
cx = float(data.get("cx", w / 2))
cy = float(data.get("cy", h / 2))
# 2. Read distortion coefficients
k1 = float(data.get("k1", 0.0))
k2 = float(data.get("k2", 0.0))
k3 = float(data.get("k3", 0.0))
k4 = float(data.get("k4", 0.0))
p1 = float(data.get("p1", 0.0))
p2 = float(data.get("p2", 0.0))
# Construct Camera Matrix
K = np.array([
[fl_x, 0, cx],
[0, fl_y, cy],
[0, 0, 1 ]
])
# Construct Distortion Vector
D = np.array([k1, k2, p1, p2, k3, k4, 0.0, 0.0]) # OpenCV order
print(f"Camera Matrix:\n{K}")
print(f"Distortion Coeffs: {D}")
# 3. Calculate Optimal New Camera Matrix
# This step is important because after straightening the image, the original focal length and optical center might change
# alpha=0: Crop all black borders (FOV becomes smaller)
# alpha=1: Keep all pixels (will have black borders)
alpha = 0 if crop_to_valid else 1
new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), alpha, (w, h))
# ROI for cropping (x, y, w, h)
x, y, w_roi, h_roi = roi
# 4. Prepare new JSON data
new_data = data.copy()
# Update intrinsics in JSON to the new values "after undistortion"
new_data["fl_x"] = new_K[0, 0]
new_data["fl_y"] = new_K[1, 1]
new_data["cx"] = new_K[0, 2]
new_data["cy"] = new_K[1, 2]
new_data["w"] = w_roi if crop_to_valid else w
new_data["h"] = h_roi if crop_to_valid else h
# Zero out distortion parameters (since the image is now straight)
for key in ["k1", "k2", "k3", "k4", "p1", "p2"]:
new_data[key] = 0.0
new_frames = []
frames = data.get("frames", [])
print(f"Processing {len(frames)} images...")
# 5. Start batch processing images
json_dir = Path(json_path).parent
for idx, frame in enumerate(frames):
# Process path
rel_path = frame["file_path"]
# Try to combine absolute path
img_path = json_dir / rel_path
if not img_path.exists():
print(f"Warning: Image not found: {img_path}")
continue
# Read image
img = cv2.imread(str(img_path))
if img is None:
continue
# [Core Step] Undistort
dst = cv2.undistort(img, K, D, None, new_K)
# Crop (if CROP_TO_VALID = True)
if crop_to_valid:
dst = dst[y:y+h_roi, x:x+w_roi]
# Save file
img_name = Path(rel_path).name
save_path = images_out_dir / img_name
cv2.imwrite(str(save_path), dst)
# Update frame's file_path to point to the new image
new_frame = frame.copy()
# Write relative path here for easier JSON portability
new_frame["file_path"] = f"images_undistorted/{img_name}"
new_frames.append(new_frame)
if idx % 20 == 0:
print(f"Processed {idx}/{len(frames)}...")
new_data["frames"] = new_frames
# 6. Save new JSON
new_json_path = output_path / "transforms_undistorted.json"
with open(new_json_path, 'w') as f:
json.dump(new_data, f, indent=4)
print("Done!")
print(f"Undistorted images saved to: {images_out_dir}")
print(f"New JSON saved to: {new_json_path}")
print("Use this new JSON in Houdini!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Undistort images and transforms.json")
parser.add_argument("--json_path", type=str, required=True, help="Path to input transforms.json")
parser.add_argument("--output_dir", type=str, required=True, help="Path to output directory")
parser.add_argument("--crop", dest="crop_to_valid", action="store_true", help="Crop to valid region")
parser.add_argument("--no-crop", dest="crop_to_valid", action="store_false", help="Do not crop")
parser.set_defaults(crop_to_valid=CROP_TO_VALID)
args = parser.parse_args()
undistort_process(args.json_path, args.output_dir, args.crop_to_valid)
```
### 參數說明
* `--json_path`: 輸入的 `transforms.json` 路徑 (由 colmap2nerf 產生)。
* `--output_dir`: 輸出目錄,程式將會在此建立 `images_undistorted` 資料夾與 `transforms_undistorted.json`。
* `--no-crop`: 保留黑邊,最大化視角。
---
## 5. 轉換點雲為 PLY 格式 (Step 4: Point Cloud to PLY)
若需要在 Houdini 中直接讀取點雲,可以使用 COLMAP 的 `model_converter` 將稀疏重建的點雲 (`points3D.bin`) 轉換為 `.ply` 格式。
### 用法
```bash
colmap model_converter --input_path "path\to\output\Video01\sparse\0" --output_path "path\to\output\Video01\points3D.ply" --output_type PLY
```
### 參數說明
* `--input_path`: 指向包含 `points3D.bin` 的資料夾 (通常是 `sparse\0`)。
* `--output_path`: 輸出的 `.ply` 檔案路徑。
* `--output_type`: 指定輸出格式為 `PLY`。
---
## 6. 匯入 Houdini (Step 5: Import to Houdini)
最後,在 Houdini 內讀取 `transforms_undistorted.json` 並建立帶有動畫的攝影機。
```python
import hou
import json
import os
import re
def create_animated_camera(json_path, global_scale=1, cam_name="Nerfstudio_Animated_Cam", aperture_width=36.0):
# 1. Check file
if not os.path.exists(json_path):
hou.ui.displayMessage(f"Error: File not found at:\n{json_path}")
return
print(f"Loading JSON: {json_path}")
with open(json_path, 'r') as f:
data = json.load(f)
# 2. Get basic information
frames = data.get("frames", [])
# Sort by number in filename (ensure correct animation order)
def get_frame_num(frame_data):
fname = os.path.basename(frame_data['file_path'])
match = re.search(r'(\d+)', fname)
return int(match.group(1)) if match else 0
frames.sort(key=get_frame_num)
if not frames:
print("No frames found in JSON.")
return
# Read resolution and focal length
img_w = float(data.get("w", 1920))
img_h = float(data.get("h", 1080))
fl_x = float(data.get("fl_x", 1000)) # Focal Length in Pixels
# Convert to Houdini Focal Length (mm)
focal_mm = (fl_x / img_w) * aperture_width
# 3. Create Houdini nodes
obj = hou.node("/obj")
subnet = obj.node("NeRF_Import")
if not subnet:
subnet = obj.createNode("subnet", "NeRF_Import")
# Create camera (destroy and recreate if it already exists)
cam = subnet.node(cam_name)
if cam:
cam.destroy()
cam = subnet.createNode("cam", cam_name)
print(f"Creating animation for {len(frames)} frames...")
# Set static camera parameters
cam.parm("resx").set(img_w)
cam.parm("resy").set(img_h)
cam.parm("aperture").set(aperture_width)
cam.parm("focal").set(focal_mm)
cam.parm("iconscale").set(0.5)
# 4. Prepare coordinate transformation matrix (Z-up -> Y-up) (Maybe...)
correction_rot = hou.hmath.buildRotate(0, 0, 0)
# 5. Process animation keyframes
with hou.undos.group("Import Nerfstudio Camera"):
for frame_data in frames:
# Get Frame Number
f_num = get_frame_num(frame_data)
# Read matrix
raw_mtx = frame_data["transform_matrix"]
# [Correction]: Variable name typo fixed, now using raw_mtx
if isinstance(raw_mtx[0], list):
flat_mtx = [item for sublist in raw_mtx for item in sublist]
else:
flat_mtx = raw_mtx
# Convert to Houdini Matrix4
h_mtx = hou.Matrix4(tuple(flat_mtx))
# Transpose matrix (Column-Major -> Row-Major)
h_mtx = h_mtx.transposed()
# Apply coordinate correction
final_mtx = h_mtx * correction_rot
# Extract transform data
tra = final_mtx.extractTranslates()
rot = final_mtx.extractRotates()
# Prepare values (apply scaling)
tx = tra[0] * global_scale
ty = tra[1] * global_scale
tz = tra[2] * global_scale
rx, ry, rz = rot
# Set Keyframes
target_parms = ["tx", "ty", "tz", "rx", "ry", "rz"]
values = [tx, ty, tz, rx, ry, rz]
for p_name, val in zip(target_parms, values):
k = hou.Keyframe()
k.setFrame(f_num)
k.setValue(val)
k.setExpression("linear()")
cam.parm(p_name).setKeyframe(k)
# 6. Set scene range
start_frame = get_frame_num(frames[0])
end_frame = get_frame_num(frames[-1])
hou.playbar.setFrameRange(start_frame, end_frame)
hou.playbar.setPlaybackRange(start_frame, end_frame)
hou.setFrame(start_frame)
subnet.layoutChildren()
print(f"Success! Animated camera created at: {cam.path()}")
```