Try   HackMD

【4】高階函式與設計

contributed by AgainTW


章節


Outline

  1. Chisel Standard Library
  2. 高階函數
  3. 自定義函數

名詞解釋

  • backpressure mechanism: 處理資料產生速度快於資料消耗速度的機制
  • AMBA: Advanced Micro-controller Bus Architecture,高級微控制器匯流排架構
    • 一種開放標準片上互連規範,用於連接和管理系統上的功能塊
    • 是用於 ARM 架構下系統晶片(SoC)設計中的一種匯流排架構,由安謀國際科技於 1996 年開發
    • 促進一次成功開發具有一個或多個 CPU、GPU 或訊號處理器的嵌入式微控制器產品,
    • 技術獨立,允許在不同的 IC 製程中重複使用 IP 核心、週邊和系統宏單元,
    • 鼓勵模組化系統設計以提高處理器獨立性,並開發可重複使用的周邊和系統 IP 庫
    • 最大限度地減少矽基礎設施,同時支援高性能和低功耗片上通訊

Chisel Standard Library

  • 目的: 為常用硬體模組提供標準介面庫(鼓勵 RTL 的互通性),例如 AXI4 介面

DecoupledIO

  • 提供了雙向的流量控制機制,包括 backpressure mechanism
  • 資料類型是可配置的
  • ready 和 valid 不耦合
    • ready 僅取決於接收器是否能夠接收數據
    • valid 僅取決於來源是否有數據
    • ready 和 valid 耦合可能會導致無法合成(unsynthesizable)的組合電路迴圈
  • 說明:
    • 發送端
      • bits: 發送端資料線
      • valid: 資料準備好時,拉高電位
    • 接收端:
      • ready: 準備好接收資料時,拉高電位
    • 當 valid 和 ready 都拉高電位,進行一次傳輸
    • 在事務處理之後(在下一個時鐘週期),值才會更新
  • 語法
val myChiselData = UInt(8.W) // or any Chisel data type, such as Bool(), SInt(...), or even custom Bundles val myDecoupled_1 = Decoupled(myChiselData) val myDecoupled_2 = Decoupled(UInt(8.W))
  • 範例
    • 沒啥實質用途,就是展示一下方向關係
class Decoupled_test extends Module { val io = IO(new Bundle { val in = Flipped(Decoupled(UInt(8.W))) val out = Decoupled(UInt(8.W)) }) io.out.valid := io.out.ready io.in.ready := io.in.valid io.out.bits := io.in.bits } println(getVerilog(new Decoupled_test))
module Decoupled_test( input clock, input reset, output io_in_ready, input io_in_valid, input [7:0] io_in_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); assign io_in_ready = io_in_valid; // @[cmd23.sc 7:17] assign io_out_valid = io_out_ready; // @[cmd23.sc 6:18] assign io_out_bits = io_in_bits; // @[cmd23.sc 8:17] endmodule

Queue

  • 建立一個 FIFO 佇列
  • 資料類型和元素數量都是可配置的
  • 兩側具有解耦接口
    • 所以需要搭配對耦接口使用
  • 允許背壓
  • <>: "【2】組合電路、序向電路和 Control Flow"有提到
  • 範例:
    • 可以發現使用 Queue 會產生一個 Queue 的序向電路;以及一個呼叫 Queue module 的組合電路
class queue_test(length: Int) extends Module { val io = IO(new Bundle { val in = Flipped(Decoupled(Vec(length, UInt(8.W)))) val out = Decoupled(Vec(length, UInt(8.W))) }) val queue = Queue(io.in, length) io.out <> queue } println(getVerilog(new queue_test(2)))
module Queue( input clock, input reset, output io_enq_ready, input io_enq_valid, input [7:0] io_enq_bits_0, input [7:0] io_enq_bits_1, input io_deq_ready, output io_deq_valid, output [7:0] io_deq_bits_0, output [7:0] io_deq_bits_1 ); `ifdef RANDOMIZE_MEM_INIT reg [31:0] _RAND_0; reg [31:0] _RAND_1; `endif // RANDOMIZE_MEM_INIT `ifdef RANDOMIZE_REG_INIT reg [31:0] _RAND_2; reg [31:0] _RAND_3; reg [31:0] _RAND_4; `endif // RANDOMIZE_REG_INIT reg [7:0] ram_0 [0:1]; // @[Decoupled.scala 218:16] wire [7:0] ram_0_io_deq_bits_MPORT_data; // @[Decoupled.scala 218:16] wire ram_0_io_deq_bits_MPORT_addr; // @[Decoupled.scala 218:16] wire [7:0] ram_0_MPORT_data; // @[Decoupled.scala 218:16] wire ram_0_MPORT_addr; // @[Decoupled.scala 218:16] wire ram_0_MPORT_mask; // @[Decoupled.scala 218:16] wire ram_0_MPORT_en; // @[Decoupled.scala 218:16] reg [7:0] ram_1 [0:1]; // @[Decoupled.scala 218:16] wire [7:0] ram_1_io_deq_bits_MPORT_data; // @[Decoupled.scala 218:16] wire ram_1_io_deq_bits_MPORT_addr; // @[Decoupled.scala 218:16] wire [7:0] ram_1_MPORT_data; // @[Decoupled.scala 218:16] wire ram_1_MPORT_addr; // @[Decoupled.scala 218:16] wire ram_1_MPORT_mask; // @[Decoupled.scala 218:16] wire ram_1_MPORT_en; // @[Decoupled.scala 218:16] reg value; // @[Counter.scala 60:40] reg value_1; // @[Counter.scala 60:40] reg maybe_full; // @[Decoupled.scala 221:27] wire ptr_match = value == value_1; // @[Decoupled.scala 223:33] wire empty = ptr_match & ~maybe_full; // @[Decoupled.scala 224:25] wire full = ptr_match & maybe_full; // @[Decoupled.scala 225:24] wire do_enq = io_enq_ready & io_enq_valid; // @[Decoupled.scala 40:37] wire do_deq = io_deq_ready & io_deq_valid; // @[Decoupled.scala 40:37] assign ram_0_io_deq_bits_MPORT_addr = value_1; assign ram_0_io_deq_bits_MPORT_data = ram_0[ram_0_io_deq_bits_MPORT_addr]; // @[Decoupled.scala 218:16] assign ram_0_MPORT_data = io_enq_bits_0; assign ram_0_MPORT_addr = value; assign ram_0_MPORT_mask = 1'h1; assign ram_0_MPORT_en = io_enq_ready & io_enq_valid; assign ram_1_io_deq_bits_MPORT_addr = value_1; assign ram_1_io_deq_bits_MPORT_data = ram_1[ram_1_io_deq_bits_MPORT_addr]; // @[Decoupled.scala 218:16] assign ram_1_MPORT_data = io_enq_bits_1; assign ram_1_MPORT_addr = value; assign ram_1_MPORT_mask = 1'h1; assign ram_1_MPORT_en = io_enq_ready & io_enq_valid; assign io_enq_ready = ~full; // @[Decoupled.scala 241:19] assign io_deq_valid = ~empty; // @[Decoupled.scala 240:19] assign io_deq_bits_0 = ram_0_io_deq_bits_MPORT_data; // @[Decoupled.scala 242:15] assign io_deq_bits_1 = ram_1_io_deq_bits_MPORT_data; // @[Decoupled.scala 242:15] always @(posedge clock) begin if(ram_0_MPORT_en & ram_0_MPORT_mask) begin ram_0[ram_0_MPORT_addr] <= ram_0_MPORT_data; // @[Decoupled.scala 218:16] end if(ram_1_MPORT_en & ram_1_MPORT_mask) begin ram_1[ram_1_MPORT_addr] <= ram_1_MPORT_data; // @[Decoupled.scala 218:16] end if (reset) begin // @[Counter.scala 60:40] value <= 1'h0; // @[Counter.scala 60:40] end else if (do_enq) begin // @[Decoupled.scala 229:17] value <= value + 1'h1; // @[Counter.scala 76:15] end if (reset) begin // @[Counter.scala 60:40] value_1 <= 1'h0; // @[Counter.scala 60:40] end else if (do_deq) begin // @[Decoupled.scala 233:17] value_1 <= value_1 + 1'h1; // @[Counter.scala 76:15] end if (reset) begin // @[Decoupled.scala 221:27] maybe_full <= 1'h0; // @[Decoupled.scala 221:27] end else if (do_enq != do_deq) begin // @[Decoupled.scala 236:28] maybe_full <= do_enq; // @[Decoupled.scala 237:16] end end endmodule
module QD_test( input clock, input reset, output io_in_ready, input io_in_valid, input [7:0] io_in_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); wire queue_clock; // @[Decoupled.scala 296:21] wire queue_reset; // @[Decoupled.scala 296:21] wire queue_io_enq_ready; // @[Decoupled.scala 296:21] wire queue_io_enq_valid; // @[Decoupled.scala 296:21] wire [7:0] queue_io_enq_bits; // @[Decoupled.scala 296:21] wire queue_io_deq_ready; // @[Decoupled.scala 296:21] wire queue_io_deq_valid; // @[Decoupled.scala 296:21] wire [7:0] queue_io_deq_bits; // @[Decoupled.scala 296:21] Queue queue ( // @[Decoupled.scala 296:21] .clock(queue_clock), .reset(queue_reset), .io_enq_ready(queue_io_enq_ready), .io_enq_valid(queue_io_enq_valid), .io_enq_bits(queue_io_enq_bits), .io_deq_ready(queue_io_deq_ready), .io_deq_valid(queue_io_deq_valid), .io_deq_bits(queue_io_deq_bits) ); assign io_in_ready = queue_io_enq_ready; // @[Decoupled.scala 299:17] assign io_out_valid = queue_io_deq_valid; // @[cmd26.sc 8:12] assign io_out_bits = queue_io_deq_bits; // @[cmd26.sc 8:12] assign queue_clock = clock; assign queue_reset = reset; assign queue_io_enq_valid = io_in_valid; // @[Decoupled.scala 297:22] assign queue_io_enq_bits = io_in_bits; // @[Decoupled.scala 298:21] assign queue_io_deq_ready = io_out_ready; // @[cmd26.sc 8:12] endmodule

Priority arbiter

  • 當硬體有多個生產者、多個消費者,就需要 arbiter 進行仲裁
  • 實際例子: AMBA
    image
  • arbiter 在 chisel 分兩種:
    • Priority: 優先考慮優先權較高的生產者(數字越小優先權越高)(範例有接續說明)
    • RRArbiter: 按循環順序運行
  • 透過組合電路實現的
  • 內部使用 ArbiterIO 接口,而 ArbiterIO 又是由 DecoupledIO 實現
  • 範例: 從多個輸入中藉由優先級決定輸出
    • 從範例可以推論
      1. 基本就是生產者誰有空就選誰
      2. 如果都有空那就選順序較前者,因此仲裁者的輸入順序會影響優先權
class arbiter_test(length: Int) extends Module { val io = IO(new Bundle { val in = Flipped(Vec(length, Decoupled(UInt(8.W)))) val out = Decoupled(UInt(8.W)) }) val arbiter = Module(new Arbiter(UInt(8.W), length)) arbiter.io.in <> io.in io.out <> arbiter.io.out } println(getVerilog(new arbiter_test(2)))
module Arbiter( output io_in_0_ready, input io_in_0_valid, input [7:0] io_in_0_bits, output io_in_1_ready, input io_in_1_valid, input [7:0] io_in_1_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); wire grant_1 = ~io_in_0_valid; // @[Arbiter.scala 31:78] assign io_in_0_ready = io_out_ready; // @[Arbiter.scala 134:19] assign io_in_1_ready = grant_1 & io_out_ready; // @[Arbiter.scala 134:19] assign io_out_valid = ~grant_1 | io_in_1_valid; // @[Arbiter.scala 135:31] assign io_out_bits = io_in_0_valid ? io_in_0_bits : io_in_1_bits; // @[Arbiter.scala 126:27 Arbiter.scala 128:19 Arbiter.scala 124:15] endmodule
module arbiter_test( input clock, input reset, output io_in_0_ready, input io_in_0_valid, input [7:0] io_in_0_bits, output io_in_1_ready, input io_in_1_valid, input [7:0] io_in_1_bits, input io_out_ready, output io_out_valid, output [7:0] io_out_bits ); wire arbiter_io_in_0_ready; // @[cmd30.sc 6:25] wire arbiter_io_in_0_valid; // @[cmd30.sc 6:25] wire [7:0] arbiter_io_in_0_bits; // @[cmd30.sc 6:25] wire arbiter_io_in_1_ready; // @[cmd30.sc 6:25] wire arbiter_io_in_1_valid; // @[cmd30.sc 6:25] wire [7:0] arbiter_io_in_1_bits; // @[cmd30.sc 6:25] wire arbiter_io_out_ready; // @[cmd30.sc 6:25] wire arbiter_io_out_valid; // @[cmd30.sc 6:25] wire [7:0] arbiter_io_out_bits; // @[cmd30.sc 6:25] Arbiter arbiter ( // @[cmd30.sc 6:25] .io_in_0_ready(arbiter_io_in_0_ready), .io_in_0_valid(arbiter_io_in_0_valid), .io_in_0_bits(arbiter_io_in_0_bits), .io_in_1_ready(arbiter_io_in_1_ready), .io_in_1_valid(arbiter_io_in_1_valid), .io_in_1_bits(arbiter_io_in_1_bits), .io_out_ready(arbiter_io_out_ready), .io_out_valid(arbiter_io_out_valid), .io_out_bits(arbiter_io_out_bits) ); assign io_in_0_ready = arbiter_io_in_0_ready; // @[cmd30.sc 7:19] assign io_in_1_ready = arbiter_io_in_1_ready; // @[cmd30.sc 7:19] assign io_out_valid = arbiter_io_out_valid; // @[cmd30.sc 8:12] assign io_out_bits = arbiter_io_out_bits; // @[cmd30.sc 8:12] assign arbiter_io_in_0_valid = io_in_0_valid; // @[cmd30.sc 7:19] assign arbiter_io_in_0_bits = io_in_0_bits; // @[cmd30.sc 7:19] assign arbiter_io_in_1_valid = io_in_1_valid; // @[cmd30.sc 7:19] assign arbiter_io_in_1_bits = io_in_1_bits; // @[cmd30.sc 7:19] assign arbiter_io_out_ready = io_out_ready; // @[cmd30.sc 8:12] endmodule

Bitwise 操作

PopCount(chisel 硬體數值)

  • 以 UInt 形式傳回輸入中為 1 的位元的數量
  • 例如:
    • in = 0b11001010
    • out = 4

Reverse(chisel 硬體數值)

  • 傳回輸入的位元反轉
  • 例如:
    • in = 0b11001010
    • out = 0b1010011

OneHot encoding

  • 一種整數編碼,該位元組或向量裏僅容許其中一位爲 1,其他位都必須爲 0

UIntToOH(chisel 硬體數值)

  • UIntOneHot

OHToUInt(chisel 硬體數值)

  • OneHotUInt

Mux

PriorityMux(select: Bool, value: Data)

  • 七個輸入的多工器
PriorityMux(select, Vec(10, UInt(7.W)))

Mux1H(select: Bool, value: Data)

  • 保證選擇訊號中恰好有一個為高電平時提供了有效的實現
  • 如果 select 不為 OneHot 編碼,則行為未定義
Mux1H(select, Vec(10, UInt(7.W)))

Counter

Counter(指定的限制值)

  • 每個週期可遞增一次,直到達到某個指定的限制值(此時它會溢位)
    • 例如 Counter(3)
    • 其週期性的值為: 0, 1, 2, 0, 1, 2, 0
  • 它不是模組,其值是可讀的
    • 使用counter.value來讀計數器的值
    • 使用counter.inc()來為計時器加一
      • 好像只有辦法加 1
      • 接續兩次counter.inc()並沒有辦法使計數器在一個時脈中加 2
  • 範例:
class count extends Module { val io = IO(new Bundle { val count = Input(Bool()) val out = Output(UInt(2.W)) }) val counter = Counter(3) // 3-count Counter (outputs range [0...2]) when(io.count) { counter.inc() } io.out := counter.value } println(getVerilog(new count()))
module count( input clock, input reset, input io_count, output [1:0] io_out ); `ifdef RANDOMIZE_REG_INIT reg [31:0] _RAND_0; `endif // RANDOMIZE_REG_INIT reg [1:0] value; // @[Counter.scala 60:40] wire wrap = value == 2'h2; // @[Counter.scala 72:24] wire [1:0] _value_T_1 = value + 2'h1; // @[Counter.scala 76:24] assign io_out = value; // @[cmd40.sc 11:12] always @(posedge clock) begin if (reset) begin // @[Counter.scala 60:40] value <= 2'h0; // @[Counter.scala 60:40] end else if (io_count) begin // @[cmd40.sc 8:20] if (wrap) begin // @[Counter.scala 86:20] value <= 2'h0; // @[Counter.scala 86:28] end else begin value <= _value_T_1; // @[Counter.scala 76:15] end end end endmodule

高階函數

  • 指以函數作為參數的函數
  • map 和 reduce 都是高階函數
  • 當需要元組解包時,使用 case 語句,如 case (a, b) => a * b
    • 元組內只有單一個元素時也可以使用

chisel 設計者的核心思想

  • 連 for 都覺得他冗長(我是覺得沒必要啦~)
  • 考慮兩個等價的 FIR 程式碼
val muls = Wire(Vec(length, UInt(8.W))) for(i <- 0 until length) { if(i == 0) muls(i) := io.in * io.consts(i) else muls(i) := regs(i - 1) * io.consts(i) } val scan = Wire(Vec(length, UInt(8.W))) for(i <- 0 until length) { if(i == 0) scan(i) := muls(i) else scan(i) := muls(i) + scan(i - 1) } io.out := scan(length - 1)
io.out := (taps zip io.consts).map { case (a, b) => a * b }.reduce(_ + _)
  • 上面程式碼的解析
    • taps: 假設他是取樣點的 list,意即
      • taps(0) = io.in
      • taps(1) = regs(0)
    • (taps zip io.consts): 下面有解釋
    • Map: 一種可迭代的鍵值對(key/value)結構
      • 可以設計映射的規則
    • .reduce(): 參考【1】Scala 和 Chisel 語法簡記
      • 當 list 為空時,reduce 會回傳失敗
  • zip: 雖然【2】組合電路、序向電路和 Control Flow有提到 zip 的用法,但這邊在更詳細解析一下
    • 語法: def zip[B](that: GenIterable[B]): Iterable[(A, B)]
    • list_a zip list_blist_a.zip(list_b)都是可行的寫法
    • 從範例中可以發現 zip 會受限於最短的那個 list
val list = List(1, 2, 3 ,4) val list1 = List("A", "B", "C") //apply operation to create a zip of list val list2 = list zip list1 val list3 = list1 zip list val list4 = list.zip(list1) val list5 = list1.zip(list) //print list println(list2) println(list3) println(list4) println(list5)
List((1,A), (2,B), (3,C))
List((A,1), (B,2), (C,3))
List((1,A), (2,B), (3,C))
List((A,1), (B,2), (C,3))

zipWithIndex

  • 語法zipWithIndex: List[(A, Int)]
  • 不帶參數,但傳回一個列表,其中每個元素都是原始元素和索引(第一個為零)的元素組
println(List("a", "b", "c", "d").zipWithIndex)
List((a,0), (b,1), (c,2), (d,3))

Fold

  • 與 reduce 非常相似,只不過可以指定初始累加值
  • 語法fold(z: A)(op: (A, A) ⇒ A): A
  • 和 reduce 不同,它不會因空列表而失敗
println(List(1, 2, 3, 4).fold(2)(_ * _))
48

scan、reduce 和 fold 的其他成員

scanLeft/scanRight

  • 從一個初始值開始,有方向性的進行累積的 op 操作
  • 回傳累積過程的集合

reduceLeft/reduceRight

  • 將列表內元素有方向性的進行 op 操作的聚合

foldLeft/foldRight

  • 指定初始累加值後,將列表內元素有方向性的進行 op 操作的聚合

自定義函數

自定義函數

  • 若要不產生輸出,請傳回 Unit 類型
    • 就類似 C 的 void
  • Scala 中的函數是物件(object)。這意味著我們可以將一個函數分配給 val 並將其作為參數傳遞給類別(class)、物件或其他函數(def)
    • 所以創建 val 而不是 def,是因為使用 val 可以將該函數傳遞給其他函數
  • defval的宣告和差別
// These are normal functions.
def plus1funct(x: Int): Int = x + 1
def times2funct(x: Int): Int = x * 2

// These are functions as vals.
// The first one explicitly specifies the return type.
val plus1val: Int => Int = x => x + 1
val times2val = (x: Int) => x * 2

// Calling both looks the same.
plus1funct(4)
plus1val(4)
plus1funct(x=4)
//plus1val(x=4) // this doesn't work
defined function plus1funct
defined function times2funct
plus1val: Int => Int = ammonite.$sess.cmd7$Helper$$Lambda$2933/628001821@1e527155
times2val: Int => Int = ammonite.$sess.cmd7$Helper$$Lambda$2934/975951512@56eebcd1
res7_4: Int = 5
res7_5: Int = 5
res7_6: Int = 5
  • 使用 val 將函式作為物件傳遞的範例
    • 很像 C 的函式指標的用法
// create our function val plus1 = (x: Int) => x + 1 val times2 = (x: Int) => x * 2 // pass it to map, a list function val myList = List(1, 2, 5, 9) val myListPlus = myList.map(plus1) val myListTimes = myList.map(times2) // create a custom function, which performs an operation on X N times using recursion def opN(x: Int, n: Int, op: Int => Int): Int = { if (n <= 0) { x } else { opN(op(x), n-1, op) } } opN(7, 3, plus1) opN(7, 3, times2)
plus1: Int => Int = ammonite.$sess.cmd8$Helper$$Lambda$2972/1279130160@5c7f9e3f
times2: Int => Int = ammonite.$sess.cmd8$Helper$$Lambda$2973/1888893016@eaa4a49
myList: List[Int] = List(1, 2, 5, 9)
myListPlus: List[Int] = List(2, 3, 6, 10)
myListTimes: List[Int] = List(2, 4, 10, 18)
defined function opN
res8_6: Int = 10
res8_7: Int = 56
  • C 的函式指標範例
typedef double (*F)(double, int); double power(double, int); double multiply(double, int); double divide(double, int); double (*funcArray[])(double, int) = { power, multiply, divide, }; double powerpower(double x, int n, F func) { return func(x,n); } void main(int argc, char *argv[]) { for(i; i<size; i++){ if( strcmp(argv[1], operation[i]) == 0){ ans = powerpower(x, n, funcArray[i]); break; } } }

匿名函數 Anonymous Functions

  • 顧名思義,匿名函數是無名的
    • 例如 val 如果我們只使用一次,則無需為它建立變數名稱 a
val myList = List(5, 6, 7, 8) myList.map( (x:Int) => x + 1 ) myList.map(_ + 1)
myList: List[Int] = List(5, 6, 7, 8)
res10_1: List[Int] = List(6, 7, 8, 9)
res10_2: List[Int] = List(6, 7, 8, 9)

Question

  • 當使用不帶參數的函數時,可能會出現令人困惑的情況
    • 因為每次傳遞都是重新呼叫函數
    • 因此下面範例的值才會變動
import scala.util.Random // both x and y call the nextInt function, but x is evaluated immediately and y is a function val x = Random.nextInt def y = Random.nextInt // x was previously evaluated, so it is a constant println(s"x = $x") println(s"x = $x") // y is a function and gets reevaluated at each call, thus these produce different results println(s"y = $y") println(s"y = $y")
x = 1353115134
x = 1353115134
y = 1624387838
y = -867619323

Chisel 中的函數式程式設計

範例1 FIR

// get some math functions import scala.math.{abs, round, cos, Pi, pow} // simple triangular window val TriangularWindow: (Int, Int) => Seq[Int] = (length, bitwidth) => { val raw_coeffs = (0 until length).map( (x:Int) => 1-abs((x.toDouble-(length-1)/2.0)/((length-1)/2.0)) ) val scaled_coeffs = raw_coeffs.map( (x: Double) => round(x * pow(2, bitwidth)).toInt) scaled_coeffs } // Hamming window val HammingWindow: (Int, Int) => Seq[Int] = (length, bitwidth) => { val raw_coeffs = (0 until length).map( (x: Int) => 0.54 - 0.46*cos(2*Pi*x/(length-1))) val scaled_coeffs = raw_coeffs.map( (x: Double) => round(x * pow(2, bitwidth)).toInt) scaled_coeffs } class MyFir(length: Int, bitwidth: Int, window: (Int, Int) => Seq[Int]) extends Module { val io = IO(new Bundle { val in = Input(UInt(bitwidth.W)) val out = Output(UInt((bitwidth*2+length-1).W)) // expect bit growth, conservative but lazy }) // calculate the coefficients using the provided window function, mapping to UInts val coeffs = window(length, bitwidth).map(_.U) // create an array holding the output of the delays // note: we avoid using a Vec here since we don't need dynamic indexing val delays = Seq.fill(length)(Wire(UInt(bitwidth.W))).scan(io.in)( (prev: UInt, next: UInt) => { next := RegNext(prev) next }) // multiply, putting result in "mults" val mults = delays.zip(coeffs).map{ case(delay: UInt, coeff: UInt) => delay * coeff } // add up multiplier outputs with bit growth val result = mults.reduce(_+&_) // connect output io.out := result } visualize(() => new MyFir(7, 12, TriangularWindow)) println(getVerilog(new MyFir(7, 12, TriangularWindow)))
module MyFir( input clock, input reset, input [11:0] io_in, output [29:0] io_out ); `ifdef RANDOMIZE_REG_INIT reg [31:0] _RAND_0; reg [31:0] _RAND_1; reg [31:0] _RAND_2; reg [31:0] _RAND_3; reg [31:0] _RAND_4; reg [31:0] _RAND_5; `endif // RANDOMIZE_REG_INIT reg [11:0] REG; // @[cmd9.sc 13:20] reg [11:0] REG_1; // @[cmd9.sc 13:20] reg [11:0] REG_2; // @[cmd9.sc 13:20] reg [11:0] REG_3; // @[cmd9.sc 13:20] reg [11:0] REG_4; // @[cmd9.sc 13:20] reg [11:0] REG_5; // @[cmd9.sc 13:20] wire [12:0] mults_0 = io_in * 1'h0; // @[cmd9.sc 18:79] wire [22:0] mults_1 = REG * 11'h555; // @[cmd9.sc 18:79] wire [23:0] mults_2 = REG_1 * 12'haab; // @[cmd9.sc 18:79] wire [24:0] mults_3 = REG_2 * 13'h1000; // @[cmd9.sc 18:79] wire [23:0] mults_4 = REG_3 * 12'haab; // @[cmd9.sc 18:79] wire [22:0] mults_5 = REG_4 * 11'h555; // @[cmd9.sc 18:79] wire [12:0] mults_6 = REG_5 * 1'h0; // @[cmd9.sc 18:79] wire [22:0] _GEN_0 = {{10'd0}, mults_0}; // @[cmd9.sc 21:30] wire [23:0] _T = _GEN_0 + mults_1; // @[cmd9.sc 21:30] wire [24:0] _T_1 = _T + mults_2; // @[cmd9.sc 21:30] wire [25:0] _T_2 = _T_1 + mults_3; // @[cmd9.sc 21:30] wire [25:0] _GEN_1 = {{2'd0}, mults_4}; // @[cmd9.sc 21:30] wire [26:0] _T_3 = _T_2 + _GEN_1; // @[cmd9.sc 21:30] wire [26:0] _GEN_2 = {{4'd0}, mults_5}; // @[cmd9.sc 21:30] wire [27:0] _T_4 = _T_3 + _GEN_2; // @[cmd9.sc 21:30] wire [27:0] _GEN_3 = {{15'd0}, mults_6}; // @[cmd9.sc 21:30] wire [28:0] result = _T_4 + _GEN_3; // @[cmd9.sc 21:30] assign io_out = {{1'd0}, result}; // @[cmd9.sc 21:30] always @(posedge clock) begin REG <= io_in; // @[cmd9.sc 13:20] REG_1 <= REG; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_2 <= REG_1; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_3 <= REG_2; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_4 <= REG_3; // @[cmd9.sc 12:37 cmd9.sc 13:10] REG_5 <= REG_4; // @[cmd9.sc 12:37 cmd9.sc 13:10] end endmodule

image

範例2 類神經網路

val Step: FixedPoint => FixedPoint = x => Mux(x <= 0.F(8.BP), 0.F(8.BP), 1.F(8.BP)) val ReLU: FixedPoint => FixedPoint = x => Mux(x <= 0.F(8.BP), 0.F(8.BP), x) class Neuron(inputs: Int, act: FixedPoint => FixedPoint) extends Module { val io = IO(new Bundle { val in = Input(Vec(inputs, FixedPoint(16.W, 8.BP))) val weights = Input(Vec(inputs, FixedPoint(16.W, 8.BP))) val out = Output(FixedPoint(16.W, 8.BP)) }) val mac = io.in.zip(io.weights).map{ case(a:FixedPoint, b:FixedPoint) => a*b}.reduce(_+_) io.out := act(mac) } println(getVerilog(new Neuron(2, Step))) println(getVerilog(new Neuron(2, ReLU)))
// Step module Neuron( input clock, input reset, input [15:0] io_in_0, input [15:0] io_in_1, input [15:0] io_weights_0, input [15:0] io_weights_1, output [15:0] io_out ); wire [31:0] _T = $signed(io_in_0) * $signed(io_weights_0); // @[cmd12.sc 8:79] wire [31:0] _T_1 = $signed(io_in_1) * $signed(io_weights_1); // @[cmd12.sc 8:79] wire [31:0] mac = $signed(_T) + $signed(_T_1); // @[cmd12.sc 8:91] wire [9:0] _T_5 = $signed(mac) <= 32'sh0 ? $signed(10'sh0) : $signed(10'sh100); // @[cmd13.sc 1:46] assign io_out = {{6{_T_5[9]}},_T_5}; // @[cmd13.sc 1:46] endmodule
// ReLU module Neuron( input clock, input reset, input [15:0] io_in_0, input [15:0] io_in_1, input [15:0] io_weights_0, input [15:0] io_weights_1, output [15:0] io_out ); wire [31:0] _T = $signed(io_in_0) * $signed(io_weights_0); // @[cmd12.sc 8:79] wire [31:0] _T_1 = $signed(io_in_1) * $signed(io_weights_1); // @[cmd12.sc 8:79] wire [31:0] mac = $signed(_T) + $signed(_T_1); // @[cmd12.sc 8:91] wire [9:0] _T_5 = $signed(mac) <= 32'sh0 ? $signed(10'sh0) : $signed(10'sh100); // @[cmd13.sc 1:46] assign io_out = {{6{_T_5[9]}},_T_5}; // @[cmd13.sc 1:46] endmodule

參考