Automated Camera Tracking Workflow (COLMAP + GLOMAP)

# Automated Camera Tracking Workflow (COLMAP + GLOMAP) 本文件說明如何使用 Python 進行影片的自動化相機追蹤與 NeRF 格式轉換。 ## 1. 環境準備 (Prerequisites) 在執行腳本之前，請確保以下軟體已安裝並設定於系統環境變數 (System PATH) 中，以便腳本可以直接呼叫命令。 ### 必備軟體 1. **FFmpeg** * 用途：將影片拆解為圖片序列。 * 下載：FFmpeg 官網 * 設定：將 `bin` 資料夾路徑加入 PATH (例如 `C:\ffmpeg\bin`)。 * 驗證：在終端機輸入 `ffmpeg -version`。 2. **COLMAP** (建議使用 CUDA 版本) * 用途：特徵提取 (Feature Extraction) 與序列匹配 (Sequential Matching)。 * 下載：COLMAP GitHub * 設定：將包含 `colmap.exe` 的資料夾路徑加入 PATH。 * 驗證：在終端機輸入 `colmap --help`。 3. **GLOMAP** * 用途：全域稀疏重建 (Global Sparse Reconstruction)，比傳統 COLMAP Mapper 更快且對影片序列更穩健。 * 下載：GLOMAP GitHub * 設定：將包含 `glomap.exe` 的資料夾路徑加入 PATH。 * 驗證：在終端機輸入 `glomap --help`。 4. **Python Libraries** * 請確保安裝必要的 Python 套件： ```bash pip install numpy opencv-python ``` --- ## 2. 自動化追蹤 (Step 1: AutoTracker) 此腳本會批次處理影片資料夾，自動執行：拆幀 -> 特徵提取 -> 序列匹配 -> GLOMAP 重建 -> 轉換模型為 TXT 格式。 ```python import os import sys import subprocess import glob import argparse # System Binaries (Ensure these are in your PATH) FFMPEG = "ffmpeg" COLMAP = "colmap" GLOMAP = "glomap" def run_command(cmd, error_msg, quiet=False): """Runs a subprocess command. Returns True on success, False on failure.""" try: kwargs = {} if quiet: kwargs['stdout'] = subprocess.DEVNULL kwargs['stderr'] = subprocess.DEVNULL # Run command subprocess.run(cmd, check=True, **kwargs) return True except subprocess.CalledProcessError: print(error_msg) return False except FileNotFoundError: print(f" [ERROR] Binary not found: {cmd[0]}") print(error_msg) return False def process_video(video_path, scenes_dir, idx, total, overlap=12, scale=1.0): # Get base name and extension base_name = os.path.splitext(os.path.basename(video_path))[0] ext = os.path.splitext(video_path)[1] print(f"\n[{idx}/{total}] === Processing \"{base_name}{ext}\" ===") # Directory layout scene_path = os.path.join(scenes_dir, base_name) img_dir = os.path.join(scene_path, "images") sparse_dir = os.path.join(scene_path, "sparse") database_path = os.path.join(scene_path, "database.db") # Skip if already reconstructed if os.path.exists(scene_path): print(f" • Skipping \"{base_name}\" – already reconstructed.") return # Clean slate try: os.makedirs(img_dir, exist_ok=True) os.makedirs(sparse_dir, exist_ok=True) except OSError as e: print(f" [ERROR] Could not create directories: {e}") return # 1) Extract every frame print(" [1/4] Extracting frames ...") frame_pattern = os.path.join(img_dir, "frame_%06d.jpg") cmd_ffmpeg = [ FFMPEG, "-loglevel", "error", "-stats", "-i", video_path, "-qscale:v", "2" ] if scale != 1.0: cmd_ffmpeg.extend(["-vf", f"scale=iw*{scale}:ih*{scale}"]) cmd_ffmpeg.append(frame_pattern) if not run_command(cmd_ffmpeg, f" × FFmpeg failed – skipping \"{base_name}\"."): return # Check if frames were extracted if not glob.glob(os.path.join(img_dir, "*.jpg")): print(f" × No frames extracted – skipping \"{base_name}\".") return # 2) Feature extraction (COLMAP) print(" [2/4] COLMAP feature_extractor ...") cmd_colmap_fe = [ COLMAP, "feature_extractor", "--database_path", database_path, "--image_path", img_dir, "--ImageReader.single_camera", "1", "--SiftExtraction.use_gpu", "1" ] if not run_command(cmd_colmap_fe, f" × feature_extractor failed – skipping \"{base_name}\"."): return # 3) Sequential matching (COLMAP) print(" [3/4] COLMAP sequential_matcher ...") cmd_colmap_sm = [ COLMAP, "sequential_matcher", "--database_path", database_path, "--SequentialMatching.overlap", str(overlap) ] if not run_command(cmd_colmap_sm, f" × sequential_matcher failed – skipping \"{base_name}\"."): return # 4) Sparse reconstruction (GLOMAP) print(" [4/4] GLOMAP mapper ...") cmd_glomap = [ GLOMAP, "mapper", "--database_path", database_path, "--image_path", img_dir, "--output_path", sparse_dir ] if not run_command(cmd_glomap, f" × glomap mapper failed – skipping \"{base_name}\"."): return # Export TXT inside the model folder # Keep TXT next to BIN so Blender can import from sparse\0 directly. sparse_0_dir = os.path.join(sparse_dir, "0") if os.path.exists(sparse_0_dir): cmd_convert_1 = [ COLMAP, "model_converter", "--input_path", sparse_0_dir, "--output_path", sparse_0_dir, "--output_type", "TXT" ] run_command(cmd_convert_1, " [WARN] Failed to export TXT to sparse/0", quiet=True) # Export TXT to parent sparse\ (for Blender auto-detect) cmd_convert_2 = [ COLMAP, "model_converter", "--input_path", sparse_0_dir, "--output_path", sparse_dir, "--output_type", "TXT" ] run_command(cmd_convert_2, " [WARN] Failed to export TXT to sparse/", quiet=True) print(f" ✓ Finished \"{base_name}\" ({idx}/{total})") def main(): parser = argparse.ArgumentParser(description="Batch script for automated photogrammetry tracking workflow.") parser.add_argument("videos_dir", help="Directory containing input videos") parser.add_argument("scenes_dir", help="Directory to output scenes") parser.add_argument("--overlap", type=int, default=12, help="Sequential matching overlap (default: 12)") parser.add_argument("--scale", type=float, default=1.0, help="Image scaling factor (default: 1.0)") # If no arguments provided, print help if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) args = parser.parse_args() videos_dir = os.path.abspath(args.videos_dir) scenes_dir = os.path.abspath(args.scenes_dir) # Ensure required folders exist if not os.path.isdir(videos_dir): print(f"[ERROR] Input folder \"{videos_dir}\" missing.") input("Press Enter to exit...") sys.exit(1) try: os.makedirs(scenes_dir, exist_ok=True) except OSError as e: print(f"[ERROR] Could not create output folder \"{scenes_dir}\": {e}") sys.exit(1) # Count videos # Filter for files only video_files = [f for f in os.listdir(videos_dir) if os.path.isfile(os.path.join(videos_dir, f))] total = len(video_files) if total == 0: print(f"[INFO] No video files found in \"{videos_dir}\".") input("Press Enter to exit...") sys.exit(0) print("==============================================================") print(f" Starting GLOMAP pipeline on {total} video(s) ...") print("==============================================================") for idx, video_file in enumerate(video_files, 1): process_video(os.path.join(videos_dir, video_file), scenes_dir, idx, total, overlap=args.overlap, scale=args.scale) print("--------------------------------------------------------------") print(f" All jobs finished – results are in \"{scenes_dir}\".") print("--------------------------------------------------------------") input("Press Enter to exit...") if __name__ == "__main__": main() ``` ### 參數說明 * `videos_dir` (必要): 包含 `.mp4`, `.mov` 等影片檔的資料夾。 * `scenes_dir` (必要): 輸出結果的資料夾。 * `--overlap`: (選填, 預設 12) 序列匹配的重疊張數。 * 動作快或幀率低的影片可嘗試調高 (例如 20)。 * `--scale`: (選填, 預設 1.0) 圖片縮放比例。 * `1.0`: 原始解析度。 * `0.5`: 長寬各縮小一半 (速度較快)。 --- ## 3. 轉換為 NeRF 格式 (Step 2: colmap2nerf) 若需要 `transforms.json` (NeRF/Instant-NGP 格式)，可使用 `colmap2nerf.py` 讀取上一步驟產生的 COLMAP TXT 資料。此腳本源自 Instant-NGP GitHub。注意：建議先安裝好 Nerfstudio，以確保相關 Python 依賴庫完整。由於之前已經完成了重建並輸出了 TXT 檔在 `sparse\0` 資料夾中，我們不需要加 `--run_colmap` 參數。 ### 關鍵參數 * `--colmap_db`: 指向產生的 `database.db`。 * `--images`: 指向產生的 `images` 資料夾。 * `--text`: 指向產生的 `sparse\0` 資料夾 (裡面包含 cameras.txt, images.txt, points3D.txt)。 * `--keep_colmap_coords`: 保持 COLMAP 原始座標系 (不進行自動置中與旋轉)，這對於後續 VFX 流程整合非常重要。 * `--out`: 輸出的 json 檔案路徑。 --- ## 4. 鏡頭校正與去畸變 (Step 3: Undistortion) 使用以下腳本將圖片進行去畸變 (Undistortion)，並同步校正 `transforms.json` 中的相機參數 (Intrinsics)，使其適用於 Houdini 或其他需要 Linear Pinhole Camera 的軟體。 > **注意：** `transforms.json` 中的 `file_path` 通常是相對於 json 檔案的路徑。請確保執行腳本時，`transforms.json` 與圖片資料夾（例如 `images`）的相對位置是正確的，否則腳本可能會找不到圖片。 ```python import json import cv2 import numpy as np import os from pathlib import Path import argparse # ============================================================================== # Configuration Area # ============================================================================== # Whether to crop the black borders generated after undistortion? # True: Crop (FOV will be slightly narrower, but full frame) # False: Keep black borders (Maximize FOV, but image edges will have black curved areas) CROP_TO_VALID = True # ============================================================================== def undistort_process(json_path, output_dir, crop_to_valid): # Convert paths to absolute to handle relative paths correctly json_path = Path(os.path.abspath(json_path)) output_path = Path(os.path.abspath(output_dir)) if not json_path.exists(): print(f"Error: JSON file not found: {json_path}") return # Create output directory images_out_dir = output_path / "images_undistorted" images_out_dir.mkdir(parents=True, exist_ok=True) print(f"Reading JSON: {json_path}") with open(json_path, 'r') as f: data = json.load(f) # 1. Read camera parameters w = int(data.get("w", 1920)) h = int(data.get("h", 1080)) fl_x = float(data.get("fl_x", 1000)) fl_y = float(data.get("fl_y", fl_x)) # If fl_y is missing, it usually defaults to fl_x cx = float(data.get("cx", w / 2)) cy = float(data.get("cy", h / 2)) # 2. Read distortion coefficients k1 = float(data.get("k1", 0.0)) k2 = float(data.get("k2", 0.0)) k3 = float(data.get("k3", 0.0)) k4 = float(data.get("k4", 0.0)) p1 = float(data.get("p1", 0.0)) p2 = float(data.get("p2", 0.0)) # Construct Camera Matrix K = np.array([ [fl_x, 0, cx], [0, fl_y, cy], [0, 0, 1 ] ]) # Construct Distortion Vector D = np.array([k1, k2, p1, p2, k3, k4, 0.0, 0.0]) # OpenCV order print(f"Camera Matrix:\n{K}") print(f"Distortion Coeffs: {D}") # 3. Calculate Optimal New Camera Matrix # This step is important because after straightening the image, the original focal length and optical center might change # alpha=0: Crop all black borders (FOV becomes smaller) # alpha=1: Keep all pixels (will have black borders) alpha = 0 if crop_to_valid else 1 new_K, roi = cv2.getOptimalNewCameraMatrix(K, D, (w, h), alpha, (w, h)) # ROI for cropping (x, y, w, h) x, y, w_roi, h_roi = roi # 4. Prepare new JSON data new_data = data.copy() # Update intrinsics in JSON to the new values "after undistortion" new_data["fl_x"] = new_K[0, 0] new_data["fl_y"] = new_K[1, 1] new_data["cx"] = new_K[0, 2] new_data["cy"] = new_K[1, 2] new_data["w"] = w_roi if crop_to_valid else w new_data["h"] = h_roi if crop_to_valid else h # Zero out distortion parameters (since the image is now straight) for key in ["k1", "k2", "k3", "k4", "p1", "p2"]: new_data[key] = 0.0 new_frames = [] frames = data.get("frames", []) print(f"Processing {len(frames)} images...") # 5. Start batch processing images json_dir = Path(json_path).parent for idx, frame in enumerate(frames): # Process path rel_path = frame["file_path"] # Try to combine absolute path img_path = json_dir / rel_path if not img_path.exists(): print(f"Warning: Image not found: {img_path}") continue # Read image img = cv2.imread(str(img_path)) if img is None: continue # [Core Step] Undistort dst = cv2.undistort(img, K, D, None, new_K) # Crop (if CROP_TO_VALID = True) if crop_to_valid: dst = dst[y:y+h_roi, x:x+w_roi] # Save file img_name = Path(rel_path).name save_path = images_out_dir / img_name cv2.imwrite(str(save_path), dst) # Update frame's file_path to point to the new image new_frame = frame.copy() # Write relative path here for easier JSON portability new_frame["file_path"] = f"images_undistorted/{img_name}" new_frames.append(new_frame) if idx % 20 == 0: print(f"Processed {idx}/{len(frames)}...") new_data["frames"] = new_frames # 6. Save new JSON new_json_path = output_path / "transforms_undistorted.json" with open(new_json_path, 'w') as f: json.dump(new_data, f, indent=4) print("Done!") print(f"Undistorted images saved to: {images_out_dir}") print(f"New JSON saved to: {new_json_path}") print("Use this new JSON in Houdini!") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Undistort images and transforms.json") parser.add_argument("--json_path", type=str, required=True, help="Path to input transforms.json") parser.add_argument("--output_dir", type=str, required=True, help="Path to output directory") parser.add_argument("--crop", dest="crop_to_valid", action="store_true", help="Crop to valid region") parser.add_argument("--no-crop", dest="crop_to_valid", action="store_false", help="Do not crop") parser.set_defaults(crop_to_valid=CROP_TO_VALID) args = parser.parse_args() undistort_process(args.json_path, args.output_dir, args.crop_to_valid) ``` ### 參數說明 * `--json_path`: 輸入的 `transforms.json` 路徑 (由 colmap2nerf 產生)。 * `--output_dir`: 輸出目錄，程式將會在此建立 `images_undistorted` 資料夾與 `transforms_undistorted.json`。 * `--no-crop`: 保留黑邊，最大化視角。 --- ## 5. 轉換點雲為 PLY 格式 (Step 4: Point Cloud to PLY) 若需要在 Houdini 中直接讀取點雲，可以使用 COLMAP 的 `model_converter` 將稀疏重建的點雲 (`points3D.bin`) 轉換為 `.ply` 格式。 ### 用法 ```bash colmap model_converter --input_path "path\to\output\Video01\sparse\0" --output_path "path\to\output\Video01\points3D.ply" --output_type PLY ``` ### 參數說明 * `--input_path`: 指向包含 `points3D.bin` 的資料夾 (通常是 `sparse\0`)。 * `--output_path`: 輸出的 `.ply` 檔案路徑。 * `--output_type`: 指定輸出格式為 `PLY`。 --- ## 6. 匯入 Houdini (Step 5: Import to Houdini) 最後，在 Houdini 內讀取 `transforms_undistorted.json` 並建立帶有動畫的攝影機。 ```python import hou import json import os import re def create_animated_camera(json_path, global_scale=1, cam_name="Nerfstudio_Animated_Cam", aperture_width=36.0): # 1. Check file if not os.path.exists(json_path): hou.ui.displayMessage(f"Error: File not found at:\n{json_path}") return print(f"Loading JSON: {json_path}") with open(json_path, 'r') as f: data = json.load(f) # 2. Get basic information frames = data.get("frames", []) # Sort by number in filename (ensure correct animation order) def get_frame_num(frame_data): fname = os.path.basename(frame_data['file_path']) match = re.search(r'(\d+)', fname) return int(match.group(1)) if match else 0 frames.sort(key=get_frame_num) if not frames: print("No frames found in JSON.") return # Read resolution and focal length img_w = float(data.get("w", 1920)) img_h = float(data.get("h", 1080)) fl_x = float(data.get("fl_x", 1000)) # Focal Length in Pixels # Convert to Houdini Focal Length (mm) focal_mm = (fl_x / img_w) * aperture_width # 3. Create Houdini nodes obj = hou.node("/obj") subnet = obj.node("NeRF_Import") if not subnet: subnet = obj.createNode("subnet", "NeRF_Import") # Create camera (destroy and recreate if it already exists) cam = subnet.node(cam_name) if cam: cam.destroy() cam = subnet.createNode("cam", cam_name) print(f"Creating animation for {len(frames)} frames...") # Set static camera parameters cam.parm("resx").set(img_w) cam.parm("resy").set(img_h) cam.parm("aperture").set(aperture_width) cam.parm("focal").set(focal_mm) cam.parm("iconscale").set(0.5) # 4. Prepare coordinate transformation matrix (Z-up -> Y-up) (Maybe...) correction_rot = hou.hmath.buildRotate(0, 0, 0) # 5. Process animation keyframes with hou.undos.group("Import Nerfstudio Camera"): for frame_data in frames: # Get Frame Number f_num = get_frame_num(frame_data) # Read matrix raw_mtx = frame_data["transform_matrix"] # [Correction]: Variable name typo fixed, now using raw_mtx if isinstance(raw_mtx[0], list): flat_mtx = [item for sublist in raw_mtx for item in sublist] else: flat_mtx = raw_mtx # Convert to Houdini Matrix4 h_mtx = hou.Matrix4(tuple(flat_mtx)) # Transpose matrix (Column-Major -> Row-Major) h_mtx = h_mtx.transposed() # Apply coordinate correction final_mtx = h_mtx * correction_rot # Extract transform data tra = final_mtx.extractTranslates() rot = final_mtx.extractRotates() # Prepare values (apply scaling) tx = tra[0] * global_scale ty = tra[1] * global_scale tz = tra[2] * global_scale rx, ry, rz = rot # Set Keyframes target_parms = ["tx", "ty", "tz", "rx", "ry", "rz"] values = [tx, ty, tz, rx, ry, rz] for p_name, val in zip(target_parms, values): k = hou.Keyframe() k.setFrame(f_num) k.setValue(val) k.setExpression("linear()") cam.parm(p_name).setKeyframe(k) # 6. Set scene range start_frame = get_frame_num(frames[0]) end_frame = get_frame_num(frames[-1]) hou.playbar.setFrameRange(start_frame, end_frame) hou.playbar.setPlaybackRange(start_frame, end_frame) hou.setFrame(start_frame) subnet.layoutChildren() print(f"Success! Animated camera created at: {cam.path()}") ```