姜冠宇
RISC-V B extension. i.e., Zba, Zbb, and Zbs.
FDEXBundle
class FDEXBundle extends Bundle {
val instruction_address = UInt(Parameters.AddrWidth)
val instruction = UInt(Parameters.InstructionWidth)
val reg_read_address1 = UInt(Parameters.PhysicalRegisterAddrWidth)
val reg_read_address2 = UInt(Parameters.PhysicalRegisterAddrWidth)
val immediate = UInt(Parameters.DataWidth)
val ex_aluop1_source = UInt(1.W)
val ex_aluop2_source = UInt(1.W)
val stall = Bool()
val memory_read_enable = Bool()
val memory_write_enable = Bool()
val wb_reg_write_source = UInt(2.W)
val reg_write_enable = Bool()
val reg_write_address = UInt(Parameters.PhysicalRegisterAddrWidth)
}
Pipeline
// -----------------(pipeline)FD_EX register---------------------
fd_ex.instruction := inst_fetch.io.instruction
fd_ex.instruction_address := inst_fetch.io.instruction_address
fd_ex.immediate := id.io.ex_immediate
fd_ex.ex_aluop1_source := id.io.ex_aluop1_source
fd_ex.ex_aluop2_source := id.io.ex_aluop2_source
fd_ex.reg_read_address1 := id.io.regs_reg1_read_address
fd_ex.reg_read_address2 := id.io.regs_reg2_read_address
fd_ex.memory_read_enable := id.io.memory_read_enable
fd_ex.memory_write_enable := id.io.memory_write_enable
fd_ex.wb_reg_write_source := id.io.wb_reg_write_source
fd_ex.reg_write_enable := id.io.reg_write_enable
fd_ex.reg_write_address := id.io.reg_write_address
fd_ex.stall := ex_wb.if_jump_flag
//--------------------------------------
EXWBBundle
class EXWBBundle extends Bundle {
val instruction = UInt(Parameters.InstructionWidth)
val instruction_address = UInt(Parameters.AddrWidth)
val mem_alu_result = UInt(Parameters.DataWidth)
val reg2_data = UInt(Parameters.DataWidth)
val if_jump_flag = Bool()
val if_jump_address = UInt(Parameters.DataWidth)
val stall = Bool()
val memory_read_enable = Bool()
val memory_write_enable = Bool()
val wb_reg_write_source = UInt(2.W)
val reg_write_enable = Bool()
val reg_write_address = UInt(Parameters.PhysicalRegisterAddrWidth)
}
EXWBBundle
// -------------------(pipeline)EX_WB register-------------------
ex_wb.instruction := fd_ex.instruction
ex_wb.instruction_address := fd_ex.instruction_address
ex_wb.mem_alu_result := ex.io.mem_alu_result
ex_wb.reg2_data := ex.io.reg2_data
ex_wb.if_jump_address := ex.io.if_jump_address
ex_wb.wb_reg_write_source := fd_ex.wb_reg_write_source
ex_wb.memory_read_enable := fd_ex.memory_read_enable
ex_wb.reg_write_address := fd_ex.reg_write_address
//disable REGWE & MEMWE
when(fd_ex.stall || ex_wb.if_jump_flag) {
ex_wb.if_jump_flag := false.B
ex_wb.reg_write_enable := false.B
ex_wb.memory_write_enable := false.B
}.otherwise {
ex_wb.memory_write_enable := fd_ex.memory_write_enable
ex_wb.reg_write_enable := fd_ex.reg_write_enable
ex_wb.if_jump_flag := ex.io.if_jump_flag
}
forwarding
when(ex_wb.reg_write_enable &&
(ex_wb.reg_write_address === fd_ex.reg_read_address1)) {
when(ex_wb.memory_read_enable) {
ex.io.reg1_data := mem.io.wb_memory_read_data
}.otherwise {
ex.io.reg1_data := ex_wb.mem_alu_result
}
}.otherwise {
ex.io.reg1_data := regs.io.read_data1
}
when(ex_wb.reg_write_enable &&
(ex_wb.reg_write_address === fd_ex.reg_read_address2)) {
when(ex_wb.memory_read_enable) {
ex.io.reg2_data := mem.io.wb_memory_read_data
}.otherwise {
ex.io.reg2_data := ex_wb.mem_alu_result
}
}.otherwise {
ex.io.reg2_data := regs.io.read_data2
}
Harzard detection with flush
when(fd_ex.stall || ex_wb.if_jump_flag) {
ex_wb.if_jump_flag := false.B
ex_wb.reg_write_enable := false.B
ex_wb.memory_write_enable := false.B
}.otherwise {
ex_wb.memory_write_enable := fd_ex.memory_write_enable
ex_wb.reg_write_enable := fd_ex.reg_write_enable
ex_wb.if_jump_flag := ex.io.if_jump_flag
}
each of these smaller extensions is grouped by common function and use case, and each has its own Zb*-extension name.
Some instructions are available in only one extension while others are available in several
sh1add, sh2add, sh3add,
andn, orn, xnor,
clz, ctz, cpop,
max, maxu, min, minu,
sext.b, sext.h, zext.h
rol, ror, rori,
orc.b, rev8
clmul, clmulh, clmulr,
bclr, bclri, bext, bexti, binv, binvi, bset, bseti
0010011
(I-type)Decode order : Opcode -> Funct3 -> Funct7 -> Shamt
funct7 | shamt | rs1 | funct3 | rd | opcode | Instruction(s) |
---|---|---|---|---|---|---|
IB1 | ||||||
0010100 | 5 bits | 5 bits | 001 | 5 bits | 0010011 | bseti |
0100100 | 5 bits | 5 bits | 001 | 5 bits | 0010011 | bclri |
0110000 | 00000 | 5 bits | 001 | 5 bits | 0010011 | clz |
0110000 | 00001 | 5 bits | 001 | 5 bits | 0010011 | ctz |
0110000 | 00010 | 5 bits | 001 | 5 bits | 0010011 | cpop |
0110000 | 00100 | 5 bits | 001 | 5 bits | 0010011 | sext.b |
0110000 | 00101 | 5 bits | 001 | 5 bits | 0010011 | sext.h |
0110100 | 5 bits | 5 bits | 001 | 5 bits | 0010011 | binvi |
IB2 | ||||||
0010100 | 00111 | 5 bits | 101 | 5 bits | 0010011 | orc.b |
0100100 | 5 bits | 5 bits | 101 | 5 bits | 0010011 | bexti |
0110000 | 5 bits | 5 bits | 101 | 5 bits | 0010011 | rori |
0110100 | 11000 | 5 bits | 101 | 5 bits | 0010011 | rev8 |
object InstructionsTypeI {
val addi = 0.U
val slli = 1.U // or clz, ctz, cpop, sext.b, sext.h, bclr, binvi, bset,
val slti = 2.U
val sltiu = 3.U
val xori = 4.U
val sri = 5.U // or rori, orc.b, rev8, bexti
val ori = 6.U
val andi = 7.U
}
// ...
// IB : B-extension' opcode == 0010011
object IB1{
val slli = "b0000000".U
val bseti = "b0010100".U
val bclri = "b0100100".U
val binvi = "b0110100".U
val Zbb1 = "b0110000".U
}
object IB2{
val srli = "b0000000".U
val srai = "b0100000".U
val orcb = "b0010100".U
val bexti = "b0100100".U
val rori = "b0110000".U
val rev8 = "b0110100".U
}
object Zbb1{
val clz = 0.U
val ctz = 1.U
val cpop = 2.U
val sextb = 3.U
val sexth = 4.U
}
// ...
switch(io.opcode) {
is(InstructionTypes.IBex) {
io.alu_funct := MuxLookup(
io.funct3,
ALUFunctions.zero,
IndexedSeq(
InstructionsTypeI.addi -> ALUFunctions.add,
InstructionsTypeI.slli -> MuxLookup(
io.funct7,
ALUFunctions.slli,
IndexedSeq(
IB1.bseti -> ZbsFunctions.bext,
IB1.binvi -> ZbsFunctions.binvi,
IB1.bclri -> ZbsFunctions.bclri,
IB1.Zbb1 -> MuxLookup(
shamt,
ZbbFunctions.clz,
IndexedSeq(
Zbb1.ctz -> ZbbFunctions.ctz,
Zbb1.cpop ->ZbbFunctions.cpop,
Zbb1.sextb->ZbbFunctions.sextb,
Zbb1.sexth->ZbbFunctions.sexth
)
)
)
),
InstructionsTypeI.slti -> ALUFunctions.slt,
InstructionsTypeI.sltiu -> ALUFunctions.sltu,
InstructionsTypeI.xori -> ALUFunctions.xor,
InstructionsTypeI.ori -> ALUFunctions.or,
InstructionsTypeI.andi -> ALUFunctions.and,
InstructionsTypeI.sri -> MuxLookup(
io.funct7(5),
ALUFunctions.srl,
IB2.srai -> ALUFunctions.sra,
IB2.rori -> ZbbFunctions.rori,
IB2.rev8 -> ZbbFunctions.rev8,
IB2.orcb -> ZbbFunctions.orcb,
IB2.bexti-> ZbbFunctions.bexti
)
),
)
}
0110011
(RMType)Decode order : Opcode -> Funct7 -> Funct3 -> Shamt
funct7 | rs2 | rs1 | funct3 | rd | opcode | instruction |
---|---|---|---|---|---|---|
RB1 | ||||||
0000101 | 5 bits | 5 bits | 001 | 5 bits | 0110011 | clmul |
0000101 | 5 bits | 5 bits | 010 | 5 bits | 0110011 | clmulr |
0000101 | 5 bits | 5 bits | 011 | 5 bits | 0110011 | clmulh |
0000101 | 5 bits | 5 bits | 100 | 5 bits | 0110011 | min |
0000101 | 5 bits | 5 bits | 101 | 5 bits | 0110011 | minu |
0000101 | 5 bits | 5 bits | 110 | 5 bits | 0110011 | max |
0000101 | 5 bits | 5 bits | 111 | 5 bits | 0110011 | maxu |
0000100 | 00000 |
5 bits | 100 | 5 bits | 0110011 | zext.h |
RB2 | ||||||
0010000 | 5 bits | 5 bits | 010 | 5 bits | 0110011 | sh1add |
0010000 | 5 bits | 5 bits | 100 | 5 bits | 0110011 | sh2add |
0010000 | 5 bits | 5 bits | 110 | 5 bits | 0110011 | sh3add |
0010100 | 5 bits | 5 bits | 001 | 5 bits | 0110011 | bset |
RB3 | ||||||
0100000 | 5 bits | 5 bits | 100 | 5 bits | 0110011 | xnor |
0100000 | 5 bits | 5 bits | 110 | 5 bits | 0110011 | orn |
0100000 | 5 bits | 5 bits | 111 | 5 bits | 0110011 | andn |
RB4 | ||||||
0100100 | 5 bits | 5 bits | 001 | 5 bits | 0110011 | bclr |
0100100 | 5 bits | 5 bits | 101 | 5 bits | 0110011 | bext |
RB5 | ||||||
0110000 | 5 bits | 5 bits | 001 | 5 bits | 0110011 | rol |
0110000 | 5 bits | 5 bits | 101 | 5 bits | 0110011 | ror |
binv | ||||||
0110100 | 5 bits | 5 bits | 001 | 5 bits | 0110011 | binv |
// RB : B-extension' opcode == 0110011
// funct7 of InstructionsTypeR sll, slt, xor, srl, or, and : 0; sra: 0100000
object InstructionsTypeRorRB {
val InstructionsTypeR = "b0000000".U
val sraorRB3 = "b0100000".U
val zexth = "b0000100".U
val bset = "b0010100".U
val binv = "b0110100".U
val RB1 = "b0000101".U
val RB2 = "b0010000".U
val RB4 = "b0100100".U
val RB5 = "b0110000".U
}
object RB1 {
// funct3
val clmul = "b001".U
val clmulr = "b010".U
val clmulh = "b011".U
val min = "b100".U
val minu = "b101".U
val max = "b110".U
val maxu = "b111".U
}
// Zba
object RB2 {
// funct3
val sh1add = "b010".U
val sh2add = "b100".U
val sh3add = "b110".U
}
object RB3 {
// funct3
val sra = "b101".U
val xnor = "b100".U
val orn = "b110".U
val andn = "b111".U
}
object RB4 {
val bclr = "b001".U
val bext = "b101".U
}
object RB5 {
val rol = "b001".U
val ror = "b101".U
}
is(InstructionTypes.RM) {
io.alu_funct := MuxLookup(
io.funct7,
DontCare,
InstructionsTypeRorRB.InstructionsTypeR -> MuxLookup(
io.funct3,
ALUFunctions.zero,
IndexedSeq(
InstructionsTypeR.add_sub -> Mux(io.funct7(5), ALUFunctions.sub, ALUFunctions.add),
InstructionsTypeR.sll -> ALUFunctions.sll,
InstructionsTypeR.slt -> ALUFunctions.slt,
InstructionsTypeR.sltu -> ALUFunctions.sltu,
InstructionsTypeR.xor -> ALUFunctions.xor,
InstructionsTypeR.or -> ALUFunctions.or,
InstructionsTypeR.and -> ALUFunctions.and,
InstructionsTypeR.sr -> ALUFunctions.srl
),
),
InstructionsTypeRorRB.zexth -> ZbbFunctions.zexth,
InstructionsTypeRorRB.bset -> ZbsFunctions.bset,
InstructionsTypeRorRB.binv -> ZbsFunctions.binv,
InstructionsTypeRorRB.RB1 -> MuxLookup(
io.funct3,
ALUFunctions.zero,
IndexedSeq(
RB1.clmul -> ZbcFunctions.clmul,
RB1.clmulr -> ZbcFunctions.clmulr,
RB1.clmulh -> ZbcFunctions.clmulh,
RB1.min -> ZbbFunctions.min,
RB1.minu -> ZbbFunctions.minu,
RB1.max -> ZbbFunctions.max,
RB1.maxu -> ZbbFunctions.maxu
)
),
InstructionsTypeRorRB.RB2 -> MuxLookup (
io.funct3,
ALUFunctions.zero,
IndexedSeq(
RB2.sh1add -> ZbaFunctions.sh1add,
RB2.sh2add -> ZbaFunctions.sh2add,
RB2.sh3add -> ZbaFunctions.sh3add
)
),
InstructionsTypeRorRB.sraorRB3 -> MuxLookup(
io.funct3,
ALUFunctions.zero,
IndexedSeq(
RB3.sra -> ALUFunctions.sra,
RB3.xnor -> ZbbFunctions.xnor,
RB3.orn -> ZbbFunctions.orn,
RB3.andn -> ZbbFunctions.andn
)
),
InstructionsTypeRorRB.RB4 -> Mux(io.funct3(2), ZbsFunctions.bclr, ZbsFunctions.bext),
InstructionsTypeRorRB.RB5 -> Mux(io.funct3(2), ZbsFunctions.rol, ALUFunctions.ror),
)
}
Output | sh1add | sh2add | sh3add |
---|---|---|---|
regs_reg1_read_address: | rs1 | rs1 | rs1 |
ex_aluop1_source: | 0 | 0 | 0 |
ex_aluop2_source: | 0 | 0 | 0 |
memory_read_enable: | 0 | 0 | 0 |
memory_write_enable: | 0 | 0 | 0 |
wb_reg_write_source: | 0 | 0 | 0 |
reg_write_enable: | 1 | 1 | 1 |
sh1add rd, rs1, rs2: X(rd) = X(rs2) + (X(rs1) << 1);
sh2add rd, rs1, rs2 X(rd) = X(rs2) + (X(rs1) << 2);
sh3add rd, rs1, rs2 X(rd) = X(rs2) + (X(rs1) << 3);
andn rd, rs1, rs2: X(rd) = X(rs1) & ~X(rs2);
orn rd, rs1, rs2: X(rd) = X(rs1) | ~X(rs2);
xnor rd, rs1, rs2: X(rd) = ~(X(rs1) ^ X(rs2));
clz rd, rs:
val HighestSetBit : forall ('N : Int), 'N >= 0. bits('N) -> int
function HighestSetBit x = {
foreach (i from (xlen - 1) to 0 by 1 in dec)
if [x[i]] == 0b1 then return(i) else ();
return -1;
}
let rs = X(rs);
X[rd] = (xlen - 1) - HighestSetBit(rs);
ctz rd, rs
val LowestSetBit : forall ('N : Int), 'N >= 0. bits('N) -> int
function LowestSetBit x = {
foreach (i from 0 to (xlen - 1) by 1 in dec)
if [x[i]] == 0b1 then return(i) else ();
return xlen;
}
let rs = X(rs);
X[rd] = LowestSetBit(rs);
cpop rd, rs
let bitcount = 0;
let rs = X(rs);
foreach (i from 0 to (xlen - 1) in inc)
if rs[i] == 0b1 then bitcount = bitcount + 1 else ();
X[rd] = bitcount
max rd, rs1, rs2
let rs1_val = X(rs1);
let rs2_val = X(rs2);
let result = if rs1_val <_s rs2_val
then rs2_val
else rs1_val;
X(rd) = result;
maxu rd, rs1, rs2
let rs1_val = X(rs1);
let rs2_val = X(rs2);
let result = if rs1_val <_u rs2_val
then rs2_val
else rs1_val;
X(rd) = result;
min rd, rs1, rs2
let rs1_val = X(rs1);
let rs2_val = X(rs2);
let result = if rs1_val <_s rs2_val
then rs1_val
else rs2_val;
X(rd) = result;
minu rd, rs1, rs2
let rs1_val = X(rs1);
let rs2_val = X(rs2);
let result = if rs1_val <_u rs2_val
then rs1_val
else rs2_val;
X(rd) = result;
sext.b rd, rs: X(rd) = EXTS(X(rs)[7..0]);
sext.h rd, rs: X(rd) = EXTS(X(rs)[15..0]);
zext.h rd, rs: X(rd) = EXTZ(X(rs)[15..0]);
rol rd, rs1, rs2
let shamt = if xlen == 32
then X(rs2)[4..0]
else X(rs2)[5..0];
let result = (X(rs1) << shamt) | (X(rs2) >> (xlen - shamt));
X(rd) = result;
ror rd, rs1, rs2
let shamt = if xlen == 32
then X(rs2)[4..0]
else X(rs2)[5..0];
let result = (X(rs1) >> shamt) | (X(rs2) << (xlen - shamt));
X(rd) = result;
rori rd, rs1, shamt
let shamt = if xlen == 32
then shamt[4..0]
else shamt[5..0];
let result = (X(rs1) >> shamt) | (X(rs2) << (xlen - shamt));
X(rd) = result;
orc.b rd, rs
let input = X(rs);
let output : xlenbits = 0;
let j = xlen;
foreach (i from 0 to xlen by 8) {
output[(i + 7)..i] = if input[(i - 7)..i] == 0
then 0b00000000
else 0b11111111;
}
X[rd] = output;
rev8 rd, rs:
let input = X(rs);
let output : xlenbits = 0;
let j = xlen;
foreach (i from 0 to xlen by 8) {
output[i..(i + 7)] = input[(j - 7)..j];
j = j - 8;
}
X[rd] = output
clmul rd, rs1, rs2: clmul produces the lower half of the 2·XLEN carry-less product.
let rs1_val = X(rs1);
let rs2_val = X(rs2);
let output : xlenbits = 0;
foreach (i from 0 to xlen by 1) {
output = if ((rs2_val >> i) & 1)
then output ^ (rs1_val << i);
else output;
}
X[rd] = output
clmulh rd, rs1, rs2: clmulh produces the upper half of the 2·XLEN carry-less product.
let rs1_val = X(rs1);
let rs2_val = X(rs2);
let output : xlenbits = 0;
foreach (i from 1 to xlen by 1) {
output = if ((rs2_val >> i) & 1)
then output ^ (rs1_val >> (xlen - i));
else output;
}
X[rd] = output
clmulr rd, rs1, rs2: produces bits 2·XLEN−2:XLEN-1 of the 2·XLEN carry-less product.
Operation
let rs1_val = X(rs1);
let rs2_val = X(rs2);
let output : xlenbits = 0;
foreach (i from 0 to (xlen - 1) by 1) {
output = if ((rs2_val >> i) & 1)
then output ^ (rs1_val >> (xlen - i - 1));
else output;
}
X[rd] = output
bclr rd, rs1, rs2: This instruction returns rs1 with a single bit cleared at the index specified in rs2. The index is read from the lower log2(XLEN) bits of rs2.
let index = X(rs2) & (XLEN - 1);
X(rd) = X(rs1) & ~(1 << index)
bclri rd, rs1, shamt:
let index = shamt & (XLEN - 1);
X(rd) = X(rs1) & ~(1 << index)
bext rd, rs1, rs2 Single-Bit Extract (Register)
let index = X(rs2) & (XLEN - 1);
X(rd) = (X(rs1) >> index) & 1;
bext rd, rs1, shamt:
let index = shamt & (XLEN - 1);
X(rd) = (X(rs1) >> index) & 1;
binv rd, rs1, rs2: Single-Bit Invert (Register)
let index = X(rs2) & (XLEN - 1);
X(rd) = X(rs1) ^ (1 << index)
binvi rd, rs1, shamt:
let index = shamt & (XLEN - 1);
X(rd) = X(rs1) ^ (1 << index)
bset rd, rs1, rs2: Single-Bit Set (Register)
let index = X(rs2) & (XLEN - 1);
X(rd) = X(rs1) | (1 << index)
bseti rd, rs1, shamt:
let index = shamt & (XLEN - 1);
X(rd) = X(rs1) | (1 << index)
object ALUFunctions extends ChiselEnum {
val zero, add, sub, sll, slt, xor, or, and, srl, sra, sltu = Value
// Zba
val sh1add, sh2add, sh3add = Value
// Zbb
val andn, orn, xnor,
clz, ctz, cpop,
max, maxu, min, minu,
sextb, sexth, zexth,
rol, ror, rori,
orcb, rev8 = Value
// Zbc
val clmul, clmulh, clmulr = Value
// Zbs
val bclr, bclri, bext, bexti, binv, binvi, bset, bseti = Value
}
e.g.
object B_Extension{
def ShiftRightB(A_in: UInt, bits: UInt): UInt = (A_in >> bits).asUInt
def CountLeadingZeros(A_in: UInt): UInt = PriorityEncoder(Reverse(A_in))
// ...
switch(io.func) {
// ...
// Zba
is(ALUFunctions.sh1add) {
io.result := B_Extension.ShiftLeftB(io.op1, 1.U)+ io.op2
}
// ...
// Zbb
is(ALUFunctions.clz) {
io.result := B_Extension.CountLeadingZeros(io.op1)
}
// ...
Test the execution of pepeline and whether adding extension causes errors.
sbt test
chisel-tutorial
Macbook M1使用vscode+iverilog+gtkwave实现Verilog代码的编译与运行
如何在Mac OS X上安裝Verilog環境
riscv-mini
RV32I Instruction
Pseudocode for instruction semantics
Lab3: Construct a single-cycle RISC-V CPU with Chisel
"B" Extension for Bit Manipulation, Version 1.0.0