Commit 6ada93d7 authored by jinbao chen's avatar jinbao chen
Browse files

finish bht&fix bugs

parent 4830cec5
This diff is collapsed.
......@@ -10,18 +10,18 @@
// Description: IF-ID Segment Register
//////////////////////////////////////////////////////////////////////////////////
//功能说明
//IDSegReg是IF-ID段寄存器,同时包含了?个同步读写的Bram(此处你可以调用我们提供的InstructionRam?
//它将会自动综合为block memory,你也可以替代�?�的调用xilinx的bram ip核)?
//同步读memory 相当? 异步读memory 的输出外接D触发器,?要时钟上升沿才能读取数据?
//此时如果再�?�过段寄存器缓存,那么需要两个时钟上升沿才能将数据传递到Ex?
//IDSegReg是IF-ID段寄存器,同时包含了??个同步读写的Bram(此处你可以调用我们提供的InstructionRam??
//它将会自动综合为block memory,你也可以替代�?�的调用xilinx的bram ip核)??
//同步读memory 相当?? 异步读memory 的输出外接D触发器,??要时钟上升沿才能读取数据??
//此时如果再�?�过段寄存器缓存,那么需要两个时钟上升沿才能将数据传递到Ex??
//因此在段寄存器模块中调用该同步memory,直接将输出传�?�到ID段组合�?�辑
//调用mem模块后输出为RD_raw,�?�过assign RD = stall_ff ? RD_old : (clear_ff ? 32'b0 : RD_raw );
//从�?�实现RD段寄存器stall和clear功能
//实验要求
//补全IDSegReg模块,需补全的片段截取如?
//补全IDSegReg模块,需补全的片段截取如??
//InstructionRam InstructionRamInst (
// .clk (), //请完善代?
// .addra (), //请完善代?
// .clk (), //请完善代??
// .addra (), //请完善代??
// .douta ( RD_raw ),
// .web ( |WE2 ),
// .addrb ( A2[31:2] ),
......@@ -55,8 +55,8 @@ module IDSegReg(
wire [31:0] RD_raw;
/* InstructionRam InstructionRamInst (
.clk (clk), //请完善代?
.addra (A[31:2]), //请完善代?
.clk (clk), //请完善代??
.addra (A[31:2]), //请完善代??
.douta ( RD_raw ),
.web ( |WE2 ),
.addrb ( A2[31:2] ),
......@@ -64,7 +64,7 @@ module IDSegReg(
.doutb ( RD2 )
); */
InstructionCache InstCacheInst (
InstructionCacheMM InstCacheInst (
.clk(clk),
.write_en(|WE2),
.addr(A[31:2]),
......
......@@ -136,6 +136,7 @@ module RV32Core(
// ---------------------------------------------
wire BTB_PREFAIL;
wire BTB_FILL;
wire [31:0] BHT_PCE;
NPC_Generator NPC_Generator1(
.PCF(PCF),
......@@ -148,7 +149,7 @@ module RV32Core(
.PC_In(PC_In),
.PCF_SEL(PCF_SEL),
.PCF_PRE(PCF_PRE),
.PCE(PCE),
.PCE(BHT_PCE),
.BTB_FILL(BTB_FILL),
.BTB_PREFAIL(BTB_PREFAIL)
);
......@@ -161,20 +162,60 @@ module RV32Core(
.PCF(PCF)
);
BTB BTB1(
/* BTB BTB1(
.clk(CPU_CLK),
.rst(CPU_RST),
.PCF(PCF),
.PCE(PCE),
// .PCE(PCE),
.BranchTypeE(BranchTypeE),
.BranchE(BranchE),
.BranchTarget(BrNPC),
.Stall(StallF|StallD|StallE),
.Flush(FlushD|FlushE),
.PC_PRE(PCF_PRE),
.PC_SEL(PCF_SEL),
.btb_flush(BTB_FLUSH),
.btb_prefail(BTB_PREFAIL),
.btb_fill(BTB_FILL),
.PCE(BHT_PCE)
); */
BHT BHT1(
.clk(CPU_CLK),
.rst(CPU_RST),
.PCF(PCF),
//.PCE(PCE),
.BranchTypeE(BranchTypeE),
.BranchE(BranchE),
.BranchTarget(BrNPC),
.Stall(StallF|StallD|StallE),
.Flush(FlushD|FlushE),
.PC_PRE(PCF_PRE),
.PC_SEL(PCF_SEL),
.btb_flush(BTB_FLUSH),
.btb_prefail(BTB_PREFAIL),
.btb_fill(BTB_FILL)
.btb_fill(BTB_FILL),
.PCE(BHT_PCE)
);
/* PRED PRED1(
.clk(CPU_CLK),
.rst(CPU_RST),
.PCF(PCF),
//.PCE(PCE),
.BranchTypeE(BranchTypeE),
.BranchE(BranchE),
.BranchTarget(BrNPC),
.Stall(StallF|StallD|StallE),
.Flush(FlushD|FlushE),
.PC_PRE(PCF_PRE),
.PC_SEL(PCF_SEL),
.flush(BTB_FLUSH),
.prefail(BTB_PREFAIL),
.fill(BTB_FILL),
.PCE(BHT_PCE)
); */
// ---------------------------------------------
// ID stage
// ---------------------------------------------
......
......@@ -98,7 +98,7 @@ module WBSegReg(
end
wire [31:0] RD_raw;
/* DataRam DataRamInst (
/* DataRam DataCacheInst (
.clk (clk), //请完善代�?
.wea (WE << A[1:0]), //请完善代�?
.addra (A[31:2]), //请完善代�?
......
module memQS16 #( //
parameter ADDR_LEN = 11 //
) (
input clk, rst,
input [ADDR_LEN-1:0] addr, // memory address
output reg [31:0] rd_data, // data read out
input wr_req,
input [31:0] wr_data // data write in
);
localparam MEM_SIZE = 1<<ADDR_LEN;
reg [31:0] ram_cell [MEM_SIZE];
always @ (posedge clk or posedge rst)
if(rst)
rd_data <= 0;
else
rd_data <= ram_cell[addr];
always @ (posedge clk)
if(wr_req)
ram_cell[addr] <= wr_data;
initial begin
ram_cell[ 0] = 32'h00000004;
ram_cell[ 1] = 32'h0000000d;
ram_cell[ 2] = 32'h00000001;
ram_cell[ 3] = 32'h0000000e;
ram_cell[ 4] = 32'h00000006;
ram_cell[ 5] = 32'h00000005;
ram_cell[ 6] = 32'h0000000c;
ram_cell[ 7] = 32'h0000000b;
ram_cell[ 8] = 32'h0000000f;
ram_cell[ 9] = 32'h0000000a;
ram_cell[ 10] = 32'h00000003;
ram_cell[ 11] = 32'h00000000;
ram_cell[ 12] = 32'h00000002;
ram_cell[ 13] = 32'h00000008;
ram_cell[ 14] = 32'h00000007;
ram_cell[ 15] = 32'h00000009;
end
endmodule
......@@ -6,60 +6,60 @@
.org 0x0
.global _start
_start:
xori a4, zero, 4 # a4寄存器决定了计算的规模,矩阵规模=N*NN=2^a4。例如a4=4,则矩阵为 2^4=16阶方阵。该值可以修改。当然,矩阵规模变化后,DataRam的内存分配方式也要同步的变化,才能运行出正确结果
xori a4, zero, 4 #0 # a4寄存器决定了计算的规模,矩阵规模=N*NN=2^a4。例如a4=4,则矩阵为 2^4=16阶方阵。该值可以修改。当然,矩阵规模变化后,DataRam的内存分配方式也要同步的变化,才能运行出正确结果
# 以下指令计算3个矩阵(目的矩阵,源矩阵1,源矩阵2)在内存中的起始地址。
# 这三个矩阵在内存中顺序而紧挨着存放,例如 a4=4,则N=16,则每个矩阵占N*N=256个字,即1024个字节
# 目的矩阵起始地址为0 源矩阵1起始地址为1024 源矩阵2起始地址为2048
# 目的矩阵起始地址放在a2里,源矩阵1起始地址放在a0里,源矩阵2起始地址放在a1
xori a3, zero, 4
sll a3, a3 , a4
xor a2, zero, zero
sll a0, a3 , a4
add a1, a0 , a0
xori a3, zero, 4#4
sll a3, a3 , a4#8
xor a2, zero, zero#12
sll a0, a3 , a4#16
add a1, a0 , a0#20
# 开始矩阵乘法,使用伪矩阵乘法公式:c_{ij} = \sigma c_{ik}*b{kj} 循环嵌套顺序(从内向外)为 i,j,k 分别使用 t0,t1,t2 存放 i,j,k
xor t0, zero, zero
xor t0, zero, zero#24
MatMulLoopI:
xor t1, zero, zero
xor t1, zero, zero#28
MatMulLoopJ:
xor t3, zero, zero #t3存放最内求和循环的累加和,首先将t3清零
xor t2, zero, zero
xor t3, zero, zero #32 #t3存放最内求和循环的累加和,首先将t3清零
xor t2, zero, zero #36
MatMulLoopK:
sll t4, t0, a4
add t4, t4, t2
add t4, t4, a0
lw t4, (t4)
sll t5, t2, a4
add t5, t5, t1
add t5, t5, a1
lw t5, (t5)
and t4, t4, t5
add t3, t3, t4
addi t2, t2, 4
blt t2, a3, MatMulLoopK
sll t4, t0, a4
add t4, t4, t1
add t4, t4, a2
sw t3, (t4)
addi t1, t1, 4
blt t1, a3, MatMulLoopJ
addi t0, t0, 4
blt t0, a3, MatMulLoopI
sll t4, t0, a4 #40
add t4, t4, t2 #44
add t4, t4, a0 #48
lw t4, (t4) #52
sll t5, t2, a4 #56
add t5, t5, t1 #60
add t5, t5, a1 #64
lw t5, (t5) #68
and t4, t4, t5 #72
add t3, t3, t4 #76
addi t2, t2, 4 #80
blt t2, a3, MatMulLoopK #84
sll t4, t0, a4 #88
add t4, t4, t1 #92
add t4, t4, a2 #96
sw t3, (t4) #100
addi t1, t1, 4 #104
blt t1, a3, MatMulLoopJ#108
addi t0, t0, 4#112
blt t0, a3, MatMulLoopI#116
# 计算结束,死循环
# 计算结束,死循环
add t0, zero, 1
sll t0, t0, a4
sll t0, t0, a4
slli t0, t0, 2
addi t1, zero, 0
add t0, zero, 1#120
sll t0, t0, a4#124
sll t0, t0, a4#128
slli t0, t0, 2#132
addi t1, zero, 0#136
Loop:
lw t2, (t1)
addi t1, t1, 4
blt t1, t0, Loop
lw t2, (t1)#140
addi t1, t1, 4#144
blt t1, t0, Loop#148
InfLoop:
jal zero, InfLoop
jal zero, InfLoop#152
\ No newline at end of file
......@@ -4,7 +4,7 @@ module cache_fifo #(
parameter LINE_ADDR_LEN = 3, // line内地??????长度,决定了每个line具有2^3个word
parameter SET_ADDR_LEN = 3, // 组地??????长度,决定了??????共有2^3=8??????
parameter TAG_ADDR_LEN = 6, // tag长度
parameter WAY_CNT = 3 // 组相连度,决定了每组中有多少路line,这里是直接映射型cache,因此该参数没用??????
parameter WAY_CNT = 1 // 组相连度,决定了每组中有多少路line,这里是直接映射型cache,因此该参数没用??????
)(
input clk, rst,
output miss, // 对CPU发出的miss信号
......
......@@ -21,7 +21,7 @@ reg [(ADDR_LEN + LINE_ADDR_LEN) - 1 : 0] mem_addr = 0;
reg [31:0] mem_wr_data = 0;
wire [31:0] mem_rd_data;
mem #(
memMM #(
.ADDR_LEN ( ADDR_LEN + LINE_ADDR_LEN )
) mem_inst (
.clk ( clk ),
......
`timescale 1ns / 1ps
module BHT #(
parameter SET_ADDR_LEN = 6
)(
input wire clk,
input wire rst,
input wire [31:0] PCF,
//input wire [31:0] PCE,
input wire [2:0]BranchTypeE,
input wire BranchE,
input wire [31:0] BranchTarget,
input wire Stall,
input wire Flush,
output wire [31:0] PC_PRE,
output wire PC_SEL,
output wire btb_flush,
output wire btb_prefail,
output wire btb_fill,
output reg [31:0] PCE
);
localparam TAG_ADDR_LEN = 32 - SET_ADDR_LEN;
localparam SET_SIZE = 1 << SET_ADDR_LEN;
reg [TAG_ADDR_LEN-1:0] btb_tags [SET_SIZE];
reg [31:0] btb_pred [SET_SIZE];
reg [1:0] btb_stat [SET_SIZE];
wire [TAG_ADDR_LEN-1:0] pcf_tag;
wire [TAG_ADDR_LEN-1:0] pce_tag;
wire [SET_ADDR_LEN-1:0] pcf_set;
wire [SET_ADDR_LEN-1:0] pce_set;
reg [31:0] PCD;
assign {pce_tag,pce_set} = PCE;
assign {pcf_tag,pcf_set} = PCF;
assign PC_PRE = btb_pred[pcf_set];
wire btb_hit;
wire [1:0] IFstat;
reg [1:0] IDstat;
reg IDhit;
reg [1:0] EXstat;
reg EXhit;
assign PC_SEL = btb_hit&&IFstat[1];
assign IFstat = btb_stat[pcf_set];
assign btb_hit = (btb_tags[pcf_set] == pcf_tag);
always@(posedge clk or posedge rst)begin
if(rst)begin
IDstat <= 0;
IDhit <= 0;
EXstat <= 0;
EXhit <= 0;
PCD <= 0;
PCE <= 0;
end else if(!Stall)begin
IDstat <= Flush?0:IFstat;
IDhit <= Flush?0:btb_hit;
EXstat <= Flush?0:IDstat;
EXhit <= Flush?0:IDhit;
PCD <= PCF;
PCE <= PCD;
end
end
assign btb_prefail = EXstat[1] && (!BranchE) && EXhit;
assign btb_fill = ((!EXstat[1]) && BranchE && EXhit) || (!EXhit && BranchE);
assign btb_flush = btb_prefail | btb_fill;
reg [1:0] next_stat;
reg [1:0] init_stat;
always@(*)begin
if(BranchE)begin
init_stat <= 2'b01;
if(EXstat[1])begin
next_stat <= 2'b11;
end else begin
if(EXstat[0])begin
next_stat <= 2'b10;
end else begin
next_stat <= 2'b01;
end
end
end else begin
init_stat <= 2'b00;
if(EXstat[1])begin
next_stat <= EXstat << 1;
end else begin
next_stat <= 2'b00;
end
end
end
/* assign next_stat = BranchE?(EXstat[1]?2'b11:EXstat << 1):(EXstat[1]?EXstat << 1:2'b00);
assign init_stat = BranchE?2'b01:2'b00; */
always @(negedge clk or posedge rst)begin
if(rst)begin
for(integer i = 0;i < SET_SIZE;i++)begin
btb_tags[i] <= 0;
btb_pred[i] <= 0;
btb_stat[i] <= 0;
end
end
else if(!Stall)begin
if(EXhit)begin
btb_stat[pce_set] <= next_stat;
end else begin
if(|BranchTypeE)begin
btb_tags[pce_set] <= pce_tag;
btb_pred[pce_set] <= BranchTarget;
btb_stat[pce_set] <= init_stat;
end
end
end
end
endmodule
\ No newline at end of file
......@@ -5,15 +5,18 @@ module BTB #(
input wire clk,
input wire rst,
input wire [31:0] PCF,
input wire [31:0] PCE,
input wire BranchE,
//input wire [31:0] PCE,
input wire [2:0] BranchTypeE,
input wire BranchE,//FIXME:BRANCH TYPE
input wire [31:0] BranchTarget,
input wire Stall,
input wire Flush,
output wire [31:0] PC_PRE,
output wire PC_SEL,
output wire btb_flush,
output wire btb_prefail,
output wire btb_fill
output wire btb_fill,
output reg [31:0] PCE
);
localparam TAG_ADDR_LEN = 32 - SET_ADDR_LEN;
......@@ -21,58 +24,79 @@ localparam SET_SIZE = 1 << SET_ADDR_LEN;
reg [TAG_ADDR_LEN-1:0] btb_tags [SET_SIZE];
reg [31:0] btb_pred [SET_SIZE];
reg btb_valid [SET_SIZE];
reg btb_stat [SET_SIZE];
wire [TAG_ADDR_LEN-1:0] pcf_tag;
wire [TAG_ADDR_LEN-1:0] pce_tag;
wire [SET_ADDR_LEN-1:0] pcf_set;
wire [SET_ADDR_LEN-1:0] pce_set;
reg [31:0] PCD;
assign {pce_tag,pce_set} = PCE;
assign {pcf_tag,pcf_set} = PCF;
assign PC_PRE = btb_pred[pcf_set];
wire btb_hit;
assign PC_SEL = btb_hit;
wire IFstat;
reg IDstat;
reg EXstat;
reg IDhit;
reg EXhit;
wire IFvalid;
reg IDvalid;
reg EXvalid;
assign PC_SEL = btb_hit&&IFstat;
assign IFvalid = btb_valid[pcf_set];
assign btb_hit = (IFvalid && (btb_tags[pcf_set] == pcf_tag));
assign IFstat = btb_stat[pcf_set];
assign btb_hit = (btb_tags[pcf_set] == pcf_tag);
always@(posedge clk or posedge rst)begin
if(rst)begin
IDvalid <= 0;
EXvalid <= 0;
IDstat <= 0;
IDhit <= 0;
EXstat <= 0;
EXhit <= 0;
PCD <= 0;
PCE <= 0;
end else if(!Stall)begin
IDvalid <= btb_hit;
EXvalid <= IDvalid;
IDstat <= Flush?0:IFstat;
IDhit <= Flush?0:btb_hit;
EXstat <= Flush?0:IDstat;
EXhit <= Flush?0:IDhit;
PCD <= PCF;
PCE <= PCD;
end
end
assign btb_prefail = EXvalid && (!BranchE);
assign btb_fill = (!EXvalid) && BranchE;
assign btb_prefail = EXstat && (!BranchE) &&EXhit;
assign btb_fill = ((!EXstat) && BranchE && EXhit) || (!EXhit && BranchE);
assign btb_flush = btb_prefail | btb_fill;
always @(posedge clk or posedge rst)begin
wire next_stat;
wire init_stat;
assign next_stat = BranchE;
assign init_stat = BranchE;
always @(negedge clk or posedge rst)begin
if(rst)begin
for(integer i = 0;i < SET_SIZE;i++)begin
btb_tags[i] <= 0;
btb_pred[i] <= 0;
btb_valid[i] <= 0;
btb_stat[i] <= 0;
end
end
else if(!Stall)begin
if(btb_prefail)begin//TODO:finish this
btb_valid[pce_set] <= 0;
end else if(btb_fill) begin
btb_valid[pce_set] <= 1;
btb_tags[pce_set] <= pce_tag;
btb_pred[pce_set] <= BranchTarget;
if(EXhit)begin//TODO:finish this
btb_stat[pce_set] <= next_stat;
end else begin
if(|BranchTypeE)begin
btb_tags[pce_set] <= pce_tag;
btb_pred[pce_set] <= BranchTarget;
btb_stat[pce_set] <= init_stat;
end
end
end
end
......
`timescale 1ns / 1ps
module PRED #(
parameter SET_ADDR_LEN = 6
)(
input wire clk,
input wire rst,
input wire [31:0] PCF,
//input wire [31:0] PCE,
input wire [2:0] BranchTypeE,
input wire BranchE,
input wire [31:0] BranchTarget,
input wire Stall,
input wire Flush,
output wire [31:0] PC_PRE,
output wire PC_SEL,
output wire flush,
output wire prefail,
output wire fill,
output wire [31:0] PCE
);
wire b_flush;
wire b_prefail;
wire b_fill;
wire b_pcsel;
wire [31:0] b_pcpre;
BTB #(SET_ADDR_LEN)BTB1(
.clk(clk),
.rst(rst),
.PCF(PCF),
.BranchE(BranchE),
.BranchTypeE(BranchTypeE),
.BranchTarget(BranchTarget),
.Stall(Stall),//TODO:UPDATE
.Flush(Flush),
.PC_PRE(b_pcpre),
.PC_SEL(b_pcsel),
.btb_flush(b_flush),
.btb_prefail(b_prefail),
.btb_fill(b_fill),
.PCE(PCE)
);
localparam TAG_ADDR_LEN = 32 - SET_ADDR_LEN;
localparam SET_SIZE = 1 << SET_ADDR_LEN;
reg [TAG_ADDR_LEN-1:0] bht_tags [SET_SIZE];
//reg [31:0] bht_pred [SET_SIZE];
reg [1:0] bht_stat [SET_SIZE];
wire [TAG_ADDR_LEN-1:0] pcf_tag;
wire [TAG_ADDR_LEN-1:0] pce_tag;
wire [SET_ADDR_LEN-1:0] pcf_set;
wire [SET_ADDR_LEN-1:0] pce_set;
assign {pce_tag,pce_set} = PCE;
assign {pcf_tag,pcf_set} = PCF;