``` import numpy as np import time from pynq import Overlay, allocate, Interrupt from pynq.lib import RegisterEvent # 載入設計後的 bitstream overlay = Overlay("matrix.bit") # 取得 IP 物件 (依實際設計名稱修改) ip = overlay.matrix_0 # 這裡保留 buffer 變數供 callback 使用 C_buf = None expected = None size_glob = 0 def run_matrix_multiply_interrupt(size): global C_buf, expected, size_glob assert 3 <= size <= 256, "size out of range!" size_glob = size num_elems = size * size # 1) 分配記憶體 A_buf = allocate(shape=(num_elems,), dtype=np.int32) B_buf = allocate(shape=(num_elems,), dtype=np.int32) C_buf = allocate(shape=(num_elems,), dtype=np.int32) for i in range(num_elems): A_buf[i] = np.random.randint(0, 10) B_buf[i] = np.random.randint(0, 10) C_buf[i] = 0 # NumPy 參考值 A_mat = A_buf.reshape(size, size) B_mat = B_buf.reshape(size, size) expected = A_mat.dot(B_mat) # 2) 設定 IP 的參數與地址 ip.write(0x10, A_buf.physical_address) ip.write(0x18, B_buf.physical_address) ip.write(0x20, C_buf.physical_address) ip.write(0x28, size) # 3) 註冊中斷 callback irq_event = RegisterEvent(ip.interrupt, event=Interrupt.IRQ_EVENT) irq_event.clear() def interrupt_callback(): irq_event.clear() hw_result = C_buf.reshape(size_glob, size_glob) if np.array_equal(hw_result, expected): print(f"[SIZE={size_glob}] ✅ PASS") else: print(f"[SIZE={size_glob}] ❌ FAIL") irq_event.register_callback(interrupt_callback) # 4) 啟動加速器 ip.write(0x00, 0x01) print(f"[SIZE={size}] ➤ Accelerator started, CPU continues working...") #-------------------------------------------------------------------- # Example test: 測試 3×3, 10×10, 128×128, 256×256 #-------------------------------------------------------------------- for test_size in [3, 10, 128, 256]: print(f"Testing matrix multiply: {test_size}x{test_size}") start_t = time.time() run_matrix_multiply_interrupt(test_size) # 模擬 CPU 做其他事(此處為延遲觀察) for i in range(5): print(f"🧠 CPU doing other work... {i}") time.sleep(0.2) end_t = time.time() print(f"Time cost (main thread): {end_t - start_t:.4f} s\n") ```