# 【4】高階函式與設計 contributed by AgainTW --- # 章節 * [【1】Scala 和 Chisel 語法簡記](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/BJdW9obUa) * [【2】組合電路、序向電路和 Control Flow](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/HyWJBxmUa) * [【3】Generators](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/SJZ7kz7L6) * [【4】高階函式與設計](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/rk0Ckf7Lp) * [【5】物件導向設計](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/B1hB1aTLT) * [【6】Generators: Types](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/SJ8tka6U6) * [【7】FIRRTL 簡介](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/HJOjkppIT) * [【8】Chisel 到 Verilog 的中間表示](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/r1URy6aIT) --- # Outline 1. Chisel Standard Library 2. 高階函數 3. 自定義函數 --- # 名詞解釋 * [backpressure mechanism](https://www.intel.com/content/www/us/en/programmable/quartushelp/17.0/reference/glossary/def_avalon_backpressure.htm): 處理資料產生速度快於資料消耗速度的機制 * AMBA: Advanced Micro-controller Bus Architecture,高級微控制器匯流排架構 * 一種開放標準片上互連規範,用於連接和管理系統上的功能塊 * 是用於 ARM 架構下系統晶片(SoC)設計中的一種匯流排架構,由安謀國際科技於 1996 年開發 * 促進一次成功開發具有一個或多個 CPU、GPU 或訊號處理器的嵌入式微控制器產品, * 技術獨立,允許在不同的 IC 製程中重複使用 IP 核心、週邊和系統宏單元, * 鼓勵模組化系統設計以提高處理器獨立性,並開發可重複使用的周邊和系統 IP 庫 * 最大限度地減少矽基礎設施,同時支援高性能和低功耗片上通訊 --- # Chisel Standard Library * 目的: 為常用硬體模組提供標準介面庫(鼓勵 RTL 的互通性),例如 AXI4 介面 ## DecoupledIO * 提供了**雙向**的流量控制機制,包括 backpressure mechanism * 資料類型是可配置的 * ready 和 valid 不耦合 * ready 僅取決於接收器是否能夠接收數據 * valid 僅取決於來源是否有數據 * ready 和 valid 耦合可能會導致無法合成(unsynthesizable)的組合電路迴圈 * 說明: * 發送端 * bits: 發送端資料線 * valid: 資料準備好時,拉高電位 * 接收端: * ready: 準備好接收資料時,拉高電位 * 當 valid 和 ready 都拉高電位,進行一次傳輸 * 在事務處理之後(在下一個時鐘週期),值才會更新 * 語法 ```scala= val myChiselData = UInt(8.W) // or any Chisel data type, such as Bool(), SInt(...), or even custom Bundles val myDecoupled_1 = Decoupled(myChiselData) val myDecoupled_2 = Decoupled(UInt(8.W)) ``` * 範例 * 沒啥實質用途,就是展示一下方向關係 ```scala= class Decoupled_test extends Module { val io = IO(new Bundle { val in = Flipped(Decoupled(UInt(8.W))) val out = Decoupled(UInt(8.W)) }) io.out.valid := io.out.ready io.in.ready := io.in.valid io.out.bits := io.in.bits } println(getVerilog(new Decoupled_test)) ``` ```mips= module Decoupled_test( input clock, input reset, output io_in_ready, input io_in_valid, input [7:0] io_in_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); assign io_in_ready = io_in_valid; // @[cmd23.sc 7:17] assign io_out_valid = io_out_ready; // @[cmd23.sc 6:18] assign io_out_bits = io_in_bits; // @[cmd23.sc 8:17] endmodule ``` ## Queue * 建立一個 FIFO 佇列 * 資料類型和元素數量都是可配置的 * 兩側具有解耦接口 * 所以需要搭配對耦接口使用 * 允許背壓 * ```<>```: ["【2】組合電路、序向電路和 Control Flow"](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/HyWJBxmUa)有提到 * 範例: * 可以發現使用 Queue 會產生一個 Queue 的序向電路;以及一個呼叫 Queue module 的組合電路 ```scala= class queue_test(length: Int) extends Module { val io = IO(new Bundle { val in = Flipped(Decoupled(Vec(length, UInt(8.W)))) val out = Decoupled(Vec(length, UInt(8.W))) }) val queue = Queue(io.in, length) io.out <> queue } println(getVerilog(new queue_test(2))) ``` ```mips= module Queue( input clock, input reset, output io_enq_ready, input io_enq_valid, input [7:0] io_enq_bits_0, input [7:0] io_enq_bits_1, input io_deq_ready, output io_deq_valid, output [7:0] io_deq_bits_0, output [7:0] io_deq_bits_1 ); `ifdef RANDOMIZE_MEM_INIT reg [31:0] _RAND_0; reg [31:0] _RAND_1; `endif // RANDOMIZE_MEM_INIT `ifdef RANDOMIZE_REG_INIT reg [31:0] _RAND_2; reg [31:0] _RAND_3; reg [31:0] _RAND_4; `endif // RANDOMIZE_REG_INIT reg [7:0] ram_0 [0:1]; // @[Decoupled.scala 218:16] wire [7:0] ram_0_io_deq_bits_MPORT_data; // @[Decoupled.scala 218:16] wire ram_0_io_deq_bits_MPORT_addr; // @[Decoupled.scala 218:16] wire [7:0] ram_0_MPORT_data; // @[Decoupled.scala 218:16] wire ram_0_MPORT_addr; // @[Decoupled.scala 218:16] wire ram_0_MPORT_mask; // @[Decoupled.scala 218:16] wire ram_0_MPORT_en; // @[Decoupled.scala 218:16] reg [7:0] ram_1 [0:1]; // @[Decoupled.scala 218:16] wire [7:0] ram_1_io_deq_bits_MPORT_data; // @[Decoupled.scala 218:16] wire ram_1_io_deq_bits_MPORT_addr; // @[Decoupled.scala 218:16] wire [7:0] ram_1_MPORT_data; // @[Decoupled.scala 218:16] wire ram_1_MPORT_addr; // @[Decoupled.scala 218:16] wire ram_1_MPORT_mask; // @[Decoupled.scala 218:16] wire ram_1_MPORT_en; // @[Decoupled.scala 218:16] reg value; // @[Counter.scala 60:40] reg value_1; // @[Counter.scala 60:40] reg maybe_full; // @[Decoupled.scala 221:27] wire ptr_match = value == value_1; // @[Decoupled.scala 223:33] wire empty = ptr_match & ~maybe_full; // @[Decoupled.scala 224:25] wire full = ptr_match & maybe_full; // @[Decoupled.scala 225:24] wire do_enq = io_enq_ready & io_enq_valid; // @[Decoupled.scala 40:37] wire do_deq = io_deq_ready & io_deq_valid; // @[Decoupled.scala 40:37] assign ram_0_io_deq_bits_MPORT_addr = value_1; assign ram_0_io_deq_bits_MPORT_data = ram_0[ram_0_io_deq_bits_MPORT_addr]; // @[Decoupled.scala 218:16] assign ram_0_MPORT_data = io_enq_bits_0; assign ram_0_MPORT_addr = value; assign ram_0_MPORT_mask = 1'h1; assign ram_0_MPORT_en = io_enq_ready & io_enq_valid; assign ram_1_io_deq_bits_MPORT_addr = value_1; assign ram_1_io_deq_bits_MPORT_data = ram_1[ram_1_io_deq_bits_MPORT_addr]; // @[Decoupled.scala 218:16] assign ram_1_MPORT_data = io_enq_bits_1; assign ram_1_MPORT_addr = value; assign ram_1_MPORT_mask = 1'h1; assign ram_1_MPORT_en = io_enq_ready & io_enq_valid; assign io_enq_ready = ~full; // @[Decoupled.scala 241:19] assign io_deq_valid = ~empty; // @[Decoupled.scala 240:19] assign io_deq_bits_0 = ram_0_io_deq_bits_MPORT_data; // @[Decoupled.scala 242:15] assign io_deq_bits_1 = ram_1_io_deq_bits_MPORT_data; // @[Decoupled.scala 242:15] always @(posedge clock) begin if(ram_0_MPORT_en & ram_0_MPORT_mask) begin ram_0[ram_0_MPORT_addr] <= ram_0_MPORT_data; // @[Decoupled.scala 218:16] end if(ram_1_MPORT_en & ram_1_MPORT_mask) begin ram_1[ram_1_MPORT_addr] <= ram_1_MPORT_data; // @[Decoupled.scala 218:16] end if (reset) begin // @[Counter.scala 60:40] value <= 1'h0; // @[Counter.scala 60:40] end else if (do_enq) begin // @[Decoupled.scala 229:17] value <= value + 1'h1; // @[Counter.scala 76:15] end if (reset) begin // @[Counter.scala 60:40] value_1 <= 1'h0; // @[Counter.scala 60:40] end else if (do_deq) begin // @[Decoupled.scala 233:17] value_1 <= value_1 + 1'h1; // @[Counter.scala 76:15] end if (reset) begin // @[Decoupled.scala 221:27] maybe_full <= 1'h0; // @[Decoupled.scala 221:27] end else if (do_enq != do_deq) begin // @[Decoupled.scala 236:28] maybe_full <= do_enq; // @[Decoupled.scala 237:16] end end endmodule ``` ```mips= module QD_test( input clock, input reset, output io_in_ready, input io_in_valid, input [7:0] io_in_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); wire queue_clock; // @[Decoupled.scala 296:21] wire queue_reset; // @[Decoupled.scala 296:21] wire queue_io_enq_ready; // @[Decoupled.scala 296:21] wire queue_io_enq_valid; // @[Decoupled.scala 296:21] wire [7:0] queue_io_enq_bits; // @[Decoupled.scala 296:21] wire queue_io_deq_ready; // @[Decoupled.scala 296:21] wire queue_io_deq_valid; // @[Decoupled.scala 296:21] wire [7:0] queue_io_deq_bits; // @[Decoupled.scala 296:21] Queue queue ( // @[Decoupled.scala 296:21] .clock(queue_clock), .reset(queue_reset), .io_enq_ready(queue_io_enq_ready), .io_enq_valid(queue_io_enq_valid), .io_enq_bits(queue_io_enq_bits), .io_deq_ready(queue_io_deq_ready), .io_deq_valid(queue_io_deq_valid), .io_deq_bits(queue_io_deq_bits) ); assign io_in_ready = queue_io_enq_ready; // @[Decoupled.scala 299:17] assign io_out_valid = queue_io_deq_valid; // @[cmd26.sc 8:12] assign io_out_bits = queue_io_deq_bits; // @[cmd26.sc 8:12] assign queue_clock = clock; assign queue_reset = reset; assign queue_io_enq_valid = io_in_valid; // @[Decoupled.scala 297:22] assign queue_io_enq_bits = io_in_bits; // @[Decoupled.scala 298:21] assign queue_io_deq_ready = io_out_ready; // @[cmd26.sc 8:12] endmodule ``` ## Priority arbiter * 當硬體有多個生產者、多個消費者,就需要 arbiter 進行仲裁 * 實際例子: AMBA ![image](https://hackmd.io/_uploads/ryTSrMJDT.png) * arbiter 在 chisel 分兩種: * Priority: 優先考慮優先權較高的生產者(數字越小優先權越高)(範例有接續說明) * RRArbiter: 按循環順序運行 * 透過組合電路實現的 * 內部使用 ArbiterIO 接口,而 ArbiterIO 又是由 DecoupledIO 實現 * 範例: 從多個輸入中藉由優先級決定輸出 * 從範例可以推論 1. 基本就是生產者誰有空就選誰 2. 如果都有空那就選順序較前者,因此仲裁者的輸入順序會影響優先權 ```scala= class arbiter_test(length: Int) extends Module { val io = IO(new Bundle { val in = Flipped(Vec(length, Decoupled(UInt(8.W)))) val out = Decoupled(UInt(8.W)) }) val arbiter = Module(new Arbiter(UInt(8.W), length)) arbiter.io.in <> io.in io.out <> arbiter.io.out } println(getVerilog(new arbiter_test(2))) ``` ```mips= module Arbiter( output io_in_0_ready, input io_in_0_valid, input [7:0] io_in_0_bits, output io_in_1_ready, input io_in_1_valid, input [7:0] io_in_1_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); wire grant_1 = ~io_in_0_valid; // @[Arbiter.scala 31:78] assign io_in_0_ready = io_out_ready; // @[Arbiter.scala 134:19] assign io_in_1_ready = grant_1 & io_out_ready; // @[Arbiter.scala 134:19] assign io_out_valid = ~grant_1 | io_in_1_valid; // @[Arbiter.scala 135:31] assign io_out_bits = io_in_0_valid ? io_in_0_bits : io_in_1_bits; // @[Arbiter.scala 126:27 Arbiter.scala 128:19 Arbiter.scala 124:15] endmodule ``` ```mips= module arbiter_test( input clock, input reset, output io_in_0_ready, input io_in_0_valid, input [7:0] io_in_0_bits, output io_in_1_ready, input io_in_1_valid, input [7:0] io_in_1_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); wire arbiter_io_in_0_ready; // @[cmd30.sc 6:25] wire arbiter_io_in_0_valid; // @[cmd30.sc 6:25] wire [7:0] arbiter_io_in_0_bits; // @[cmd30.sc 6:25] wire arbiter_io_in_1_ready; // @[cmd30.sc 6:25] wire arbiter_io_in_1_valid; // @[cmd30.sc 6:25] wire [7:0] arbiter_io_in_1_bits; // @[cmd30.sc 6:25] wire arbiter_io_out_ready; // @[cmd30.sc 6:25] wire arbiter_io_out_valid; // @[cmd30.sc 6:25] wire [7:0] arbiter_io_out_bits; // @[cmd30.sc 6:25] Arbiter arbiter ( // @[cmd30.sc 6:25] .io_in_0_ready(arbiter_io_in_0_ready), .io_in_0_valid(arbiter_io_in_0_valid), .io_in_0_bits(arbiter_io_in_0_bits), .io_in_1_ready(arbiter_io_in_1_ready), .io_in_1_valid(arbiter_io_in_1_valid), .io_in_1_bits(arbiter_io_in_1_bits), .io_out_ready(arbiter_io_out_ready), .io_out_valid(arbiter_io_out_valid), .io_out_bits(arbiter_io_out_bits) ); assign io_in_0_ready = arbiter_io_in_0_ready; // @[cmd30.sc 7:19] assign io_in_1_ready = arbiter_io_in_1_ready; // @[cmd30.sc 7:19] assign io_out_valid = arbiter_io_out_valid; // @[cmd30.sc 8:12] assign io_out_bits = arbiter_io_out_bits; // @[cmd30.sc 8:12] assign arbiter_io_in_0_valid = io_in_0_valid; // @[cmd30.sc 7:19] assign arbiter_io_in_0_bits = io_in_0_bits; // @[cmd30.sc 7:19] assign arbiter_io_in_1_valid = io_in_1_valid; // @[cmd30.sc 7:19] assign arbiter_io_in_1_bits = io_in_1_bits; // @[cmd30.sc 7:19] assign arbiter_io_out_ready = io_out_ready; // @[cmd30.sc 8:12] endmodule ``` ## Bitwise 操作 ### PopCount(chisel 硬體數值) * 以 UInt 形式傳回輸入中為 1 的位元的數量 * 例如: * in = 0b11001010 * out = 4 ### Reverse(chisel 硬體數值) * 傳回輸入的位元反轉 * 例如: * in = 0b11001010 * out = 0b1010011 ## OneHot encoding * 一種整數編碼,該位元組或向量裏僅容許其中一位爲 1,其他位都必須爲 0 ### UIntToOH(chisel 硬體數值) * ```UInt``` 到 ```OneHot``` ### OHToUInt(chisel 硬體數值) * ```OneHot``` 到 ```UInt``` ## Mux ### PriorityMux(select: Bool, value: Data) * 七個輸入的多工器 ```scala PriorityMux(select, Vec(10, UInt(7.W))) ``` ### Mux1H(select: Bool, value: Data) * 保證選擇訊號中恰好有一個為高電平時提供了有效的實現 * 如果 select 不為 OneHot 編碼,則行為未定義 ```scala Mux1H(select, Vec(10, UInt(7.W))) ``` ## Counter ### Counter(指定的限制值) * 每個週期可遞增一次,直到達到某個指定的限制值(此時它會溢位) * 例如 Counter(3) * 其週期性的值為: 0, 1, 2, 0, 1, 2, 0 ... * 它不是模組,其值是可讀的 * 使用```counter.value```來讀計數器的值 * 使用```counter.inc()```來為計時器加一 * 好像只有辦法加 1 * 接續兩次```counter.inc()```並沒有辦法使計數器在一個時脈中加 2 * 範例: ```scala= class count extends Module { val io = IO(new Bundle { val count = Input(Bool()) val out = Output(UInt(2.W)) }) val counter = Counter(3) // 3-count Counter (outputs range [0...2]) when(io.count) { counter.inc() } io.out := counter.value } println(getVerilog(new count())) ``` ```mips= module count( input clock, input reset, input io_count, output [1:0] io_out ); `ifdef RANDOMIZE_REG_INIT reg [31:0] _RAND_0; `endif // RANDOMIZE_REG_INIT reg [1:0] value; // @[Counter.scala 60:40] wire wrap = value == 2'h2; // @[Counter.scala 72:24] wire [1:0] _value_T_1 = value + 2'h1; // @[Counter.scala 76:24] assign io_out = value; // @[cmd40.sc 11:12] always @(posedge clock) begin if (reset) begin // @[Counter.scala 60:40] value <= 2'h0; // @[Counter.scala 60:40] end else if (io_count) begin // @[cmd40.sc 8:20] if (wrap) begin // @[Counter.scala 86:20] value <= 2'h0; // @[Counter.scala 86:28] end else begin value <= _value_T_1; // @[Counter.scala 76:15] end end end endmodule ``` --- # 高階函數 * 指以函數作為參數的函數 * map 和 reduce 都是高階函數 * 當需要元組解包時,使用 case 語句,如 ```case (a, b) => a * b``` * 元組內只有單一個元素時也可以使用 ## chisel 設計者的核心思想 * 連 for 都覺得他冗長(我是覺得沒必要啦~) * 考慮兩個等價的 FIR 程式碼 ```scala= val muls = Wire(Vec(length, UInt(8.W))) for(i <- 0 until length) { if(i == 0) muls(i) := io.in * io.consts(i) else muls(i) := regs(i - 1) * io.consts(i) } val scan = Wire(Vec(length, UInt(8.W))) for(i <- 0 until length) { if(i == 0) scan(i) := muls(i) else scan(i) := muls(i) + scan(i - 1) } io.out := scan(length - 1) ``` ```scala= io.out := (taps zip io.consts).map { case (a, b) => a * b }.reduce(_ + _) ``` * 上面程式碼的解析 * taps: 假設他是取樣點的 list,意即 * ```taps(0) = io.in``` * ```taps(1) = regs(0)``` * (taps zip io.consts): 下面有解釋 * Map: 一種可迭代的鍵值對(key/value)結構 * 可以設計映射的規則 * .reduce(): 參考[【1】Scala 和 Chisel 語法簡記](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/BJdW9obUa) * 當 list 為空時,reduce 會回傳失敗 * zip: 雖然[【2】組合電路、序向電路和 Control Flow](https://hackmd.io/@nfUUgsYRTGy81y5d9AYOyg/HyWJBxmUa)有提到 zip 的用法,但這邊在更詳細解析一下 * 語法: ```def zip[B](that: GenIterable[B]): Iterable[(A, B)]``` * ```list_a zip list_b```和```list_a.zip(list_b)```都是可行的寫法 * 從範例中可以發現 zip 會受限於最短的那個 list ```scala= val list = List(1, 2, 3 ,4) val list1 = List("A", "B", "C") //apply operation to create a zip of list val list2 = list zip list1 val list3 = list1 zip list val list4 = list.zip(list1) val list5 = list1.zip(list) //print list println(list2) println(list3) println(list4) println(list5) ``` ```shell List((1,A), (2,B), (3,C)) List((A,1), (B,2), (C,3)) List((1,A), (2,B), (3,C)) List((A,1), (B,2), (C,3)) ``` ## zipWithIndex * 語法```zipWithIndex: List[(A, Int)]``` * 不帶參數,但傳回一個列表,其中每個元素都是原始元素和索引(第一個為零)的元素組 ```scala= println(List("a", "b", "c", "d").zipWithIndex) ``` ```shell List((a,0), (b,1), (c,2), (d,3)) ``` ## Fold * 與 reduce 非常相似,只不過可以指定初始累加值 * 語法```fold(z: A)(op: (A, A) ⇒ A): A``` * 和 reduce 不同,它不會因空列表而失敗 ```scala= println(List(1, 2, 3, 4).fold(2)(_ * _)) ``` ```shell 48 ``` ## scan、reduce 和 fold 的其他成員 ### scanLeft/scanRight * 從一個初始值開始,有方向性的進行**累積**的 op 操作 * 回傳累積過程的集合 ### reduceLeft/reduceRight * 將列表內元素有方向性的進行 op 操作的聚合 ### foldLeft/foldRight * 指定初始累加值後,將列表內元素有方向性的進行 op 操作的聚合 --- # 自定義函數 ## 自定義函數 * 若要不產生輸出,請傳回 Unit 類型 * 就類似 C 的 void * Scala 中的函數是物件(object)。這意味著我們可以將一個函數分配給 val 並將其作為參數傳遞給類別(class)、物件或其他函數(def) * 所以創建 val 而不是 def,是因為使用 val 可以將該函數傳遞給其他函數 * ```def```和```val```的宣告和差別 ```scala // These are normal functions. def plus1funct(x: Int): Int = x + 1 def times2funct(x: Int): Int = x * 2 // These are functions as vals. // The first one explicitly specifies the return type. val plus1val: Int => Int = x => x + 1 val times2val = (x: Int) => x * 2 // Calling both looks the same. plus1funct(4) plus1val(4) plus1funct(x=4) //plus1val(x=4) // this doesn't work ``` ```shell defined function plus1funct defined function times2funct plus1val: Int => Int = ammonite.$sess.cmd7$Helper$$Lambda$2933/628001821@1e527155 times2val: Int => Int = ammonite.$sess.cmd7$Helper$$Lambda$2934/975951512@56eebcd1 res7_4: Int = 5 res7_5: Int = 5 res7_6: Int = 5 ``` * 使用 val 將函式作為物件傳遞的範例 * 很像 C 的函式指標的用法 ```scala= // create our function val plus1 = (x: Int) => x + 1 val times2 = (x: Int) => x * 2 // pass it to map, a list function val myList = List(1, 2, 5, 9) val myListPlus = myList.map(plus1) val myListTimes = myList.map(times2) // create a custom function, which performs an operation on X N times using recursion def opN(x: Int, n: Int, op: Int => Int): Int = { if (n <= 0) { x } else { opN(op(x), n-1, op) } } opN(7, 3, plus1) opN(7, 3, times2) ``` ```shell plus1: Int => Int = ammonite.$sess.cmd8$Helper$$Lambda$2972/1279130160@5c7f9e3f times2: Int => Int = ammonite.$sess.cmd8$Helper$$Lambda$2973/1888893016@eaa4a49 myList: List[Int] = List(1, 2, 5, 9) myListPlus: List[Int] = List(2, 3, 6, 10) myListTimes: List[Int] = List(2, 4, 10, 18) defined function opN res8_6: Int = 10 res8_7: Int = 56 ``` * C 的函式指標範例 ```cpp= typedef double (*F)(double, int); double power(double, int); double multiply(double, int); double divide(double, int); double (*funcArray[])(double, int) = { power, multiply, divide, }; double powerpower(double x, int n, F func) { return func(x,n); } void main(int argc, char *argv[]) { for(i; i<size; i++){ if( strcmp(argv[1], operation[i]) == 0){ ans = powerpower(x, n, funcArray[i]); break; } } } ``` ## 匿名函數 Anonymous Functions * 顧名思義,匿名函數是無名的 * 例如 val 如果我們只使用一次,則無需為它建立變數名稱 a ```scala= val myList = List(5, 6, 7, 8) myList.map( (x:Int) => x + 1 ) myList.map(_ + 1) ``` ```shell myList: List[Int] = List(5, 6, 7, 8) res10_1: List[Int] = List(6, 7, 8, 9) res10_2: List[Int] = List(6, 7, 8, 9) ``` ## Question * 當使用不帶參數的函數時,可能會出現令人困惑的情況 * 因為每次傳遞都是重新呼叫函數 * 因此下面範例的值才會變動 ```scala= import scala.util.Random // both x and y call the nextInt function, but x is evaluated immediately and y is a function val x = Random.nextInt def y = Random.nextInt // x was previously evaluated, so it is a constant println(s"x = $x") println(s"x = $x") // y is a function and gets reevaluated at each call, thus these produce different results println(s"y = $y") println(s"y = $y") ``` ```shell x = 1353115134 x = 1353115134 y = 1624387838 y = -867619323 ``` ## Chisel 中的函數式程式設計 ### 範例1 FIR ```scala= // get some math functions import scala.math.{abs, round, cos, Pi, pow} // simple triangular window val TriangularWindow: (Int, Int) => Seq[Int] = (length, bitwidth) => { val raw_coeffs = (0 until length).map( (x:Int) => 1-abs((x.toDouble-(length-1)/2.0)/((length-1)/2.0)) ) val scaled_coeffs = raw_coeffs.map( (x: Double) => round(x * pow(2, bitwidth)).toInt) scaled_coeffs } // Hamming window val HammingWindow: (Int, Int) => Seq[Int] = (length, bitwidth) => { val raw_coeffs = (0 until length).map( (x: Int) => 0.54 - 0.46*cos(2*Pi*x/(length-1))) val scaled_coeffs = raw_coeffs.map( (x: Double) => round(x * pow(2, bitwidth)).toInt) scaled_coeffs } class MyFir(length: Int, bitwidth: Int, window: (Int, Int) => Seq[Int]) extends Module { val io = IO(new Bundle { val in = Input(UInt(bitwidth.W)) val out = Output(UInt((bitwidth*2+length-1).W)) // expect bit growth, conservative but lazy }) // calculate the coefficients using the provided window function, mapping to UInts val coeffs = window(length, bitwidth).map(_.U) // create an array holding the output of the delays // note: we avoid using a Vec here since we don't need dynamic indexing val delays = Seq.fill(length)(Wire(UInt(bitwidth.W))).scan(io.in)( (prev: UInt, next: UInt) => { next := RegNext(prev) next }) // multiply, putting result in "mults" val mults = delays.zip(coeffs).map{ case(delay: UInt, coeff: UInt) => delay * coeff } // add up multiplier outputs with bit growth val result = mults.reduce(_+&_) // connect output io.out := result } visualize(() => new MyFir(7, 12, TriangularWindow)) println(getVerilog(new MyFir(7, 12, TriangularWindow))) ``` ```mips= module MyFir( input clock, input reset, input [11:0] io_in, output [29:0] io_out ); `ifdef RANDOMIZE_REG_INIT reg [31:0] _RAND_0; reg [31:0] _RAND_1; reg [31:0] _RAND_2; reg [31:0] _RAND_3; reg [31:0] _RAND_4; reg [31:0] _RAND_5; `endif // RANDOMIZE_REG_INIT reg [11:0] REG; // @[cmd9.sc 13:20] reg [11:0] REG_1; // @[cmd9.sc 13:20] reg [11:0] REG_2; // @[cmd9.sc 13:20] reg [11:0] REG_3; // @[cmd9.sc 13:20] reg [11:0] REG_4; // @[cmd9.sc 13:20] reg [11:0] REG_5; // @[cmd9.sc 13:20] wire [12:0] mults_0 = io_in * 1'h0; // @[cmd9.sc 18:79] wire [22:0] mults_1 = REG * 11'h555; // @[cmd9.sc 18:79] wire [23:0] mults_2 = REG_1 * 12'haab; // @[cmd9.sc 18:79] wire [24:0] mults_3 = REG_2 * 13'h1000; // @[cmd9.sc 18:79] wire [23:0] mults_4 = REG_3 * 12'haab; // @[cmd9.sc 18:79] wire [22:0] mults_5 = REG_4 * 11'h555; // @[cmd9.sc 18:79] wire [12:0] mults_6 = REG_5 * 1'h0; // @[cmd9.sc 18:79] wire [22:0] _GEN_0 = {{10'd0}, mults_0}; // @[cmd9.sc 21:30] wire [23:0] _T = _GEN_0 + mults_1; // @[cmd9.sc 21:30] wire [24:0] _T_1 = _T + mults_2; // @[cmd9.sc 21:30] wire [25:0] _T_2 = _T_1 + mults_3; // @[cmd9.sc 21:30] wire [25:0] _GEN_1 = {{2'd0}, mults_4}; // @[cmd9.sc 21:30] wire [26:0] _T_3 = _T_2 + _GEN_1; // @[cmd9.sc 21:30] wire [26:0] _GEN_2 = {{4'd0}, mults_5}; // @[cmd9.sc 21:30] wire [27:0] _T_4 = _T_3 + _GEN_2; // @[cmd9.sc 21:30] wire [27:0] _GEN_3 = {{15'd0}, mults_6}; // @[cmd9.sc 21:30] wire [28:0] result = _T_4 + _GEN_3; // @[cmd9.sc 21:30] assign io_out = {{1'd0}, result}; // @[cmd9.sc 21:30] always @(posedge clock) begin REG <= io_in; // @[cmd9.sc 13:20] REG_1 <= REG; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_2 <= REG_1; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_3 <= REG_2; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_4 <= REG_3; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_5 <= REG_4; // @[cmd9.sc 12:37 cmd9.sc 13:10] end endmodule ``` ![image](https://hackmd.io/_uploads/BJXmUsHPT.png) ### 範例2 類神經網路 ```scala= val Step: FixedPoint => FixedPoint = x => Mux(x <= 0.F(8.BP), 0.F(8.BP), 1.F(8.BP)) val ReLU: FixedPoint => FixedPoint = x => Mux(x <= 0.F(8.BP), 0.F(8.BP), x) class Neuron(inputs: Int, act: FixedPoint => FixedPoint) extends Module { val io = IO(new Bundle { val in = Input(Vec(inputs, FixedPoint(16.W, 8.BP))) val weights = Input(Vec(inputs, FixedPoint(16.W, 8.BP))) val out = Output(FixedPoint(16.W, 8.BP)) }) val mac = io.in.zip(io.weights).map{ case(a:FixedPoint, b:FixedPoint) => a*b}.reduce(_+_) io.out := act(mac) } println(getVerilog(new Neuron(2, Step))) println(getVerilog(new Neuron(2, ReLU))) ``` ```mips= // Step module Neuron( input clock, input reset, input [15:0] io_in_0, input [15:0] io_in_1, input [15:0] io_weights_0, input [15:0] io_weights_1, output [15:0] io_out ); wire [31:0] _T = $signed(io_in_0) * $signed(io_weights_0); // @[cmd12.sc 8:79] wire [31:0] _T_1 = $signed(io_in_1) * $signed(io_weights_1); // @[cmd12.sc 8:79] wire [31:0] mac = $signed(_T) + $signed(_T_1); // @[cmd12.sc 8:91] wire [9:0] _T_5 = $signed(mac) <= 32'sh0 ? $signed(10'sh0) : $signed(10'sh100); // @[cmd13.sc 1:46] assign io_out = {{6{_T_5[9]}},_T_5}; // @[cmd13.sc 1:46] endmodule ``` ```mips= // ReLU module Neuron( input clock, input reset, input [15:0] io_in_0, input [15:0] io_in_1, input [15:0] io_weights_0, input [15:0] io_weights_1, output [15:0] io_out ); wire [31:0] _T = $signed(io_in_0) * $signed(io_weights_0); // @[cmd12.sc 8:79] wire [31:0] _T_1 = $signed(io_in_1) * $signed(io_weights_1); // @[cmd12.sc 8:79] wire [31:0] mac = $signed(_T) + $signed(_T_1); // @[cmd12.sc 8:91] wire [9:0] _T_5 = $signed(mac) <= 32'sh0 ? $signed(10'sh0) : $signed(10'sh100); // @[cmd13.sc 1:46] assign io_out = {{6{_T_5[9]}},_T_5}; // @[cmd13.sc 1:46] endmodule ``` --- # 參考 * [Chisel-bootcamp](https://mybinder.org/v2/gh/freechipsproject/chisel-bootcamp/master) * [Decoupled 用法](https://blog.csdn.net/PP_forever/article/details/95977959) * [Arbiter 介紹](https://blog.csdn.net/qq_39507748/article/details/118887707) * [交大的講義](http://twins.ee.nctu.edu.tw/courses/embedlab_11/lecture/AMBA.pdf)