diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..307c474 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "riscv-tests"] + path = riscv-tests + url = git@github.com:riscv/riscv-tests.git diff --git a/README.md b/README.md new file mode 100644 index 0000000..c95efec --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# runt + +A small, underperforming RV32I core, written in nMigen. + +I want to get it to the point where I could actually use it in a design, but I doubt it will have +any practical use, especially when compared to a VexRiscV or picorv32 core. + +This is my first CPU, and I also somewhat intentionally didn't look at any resources, +so it will likely be full of questionable design decisions that anyone with any experience +would have avoided. + +## Milestones + +- [ ] Full RV32I implementation, able to run code in simulation (not verified working correctly at + this point) +- [ ] Full coverage with riscv-tests, verified working. +- [ ] Integrated with some hardware to make an """SoC""", running on my Arty A7 +- [ ] Full coverage with riscv-formal +- [ ] (Maybe...) work on improving performance, add caching/prefetch, pipeline it, etc. diff --git a/core.py b/core.py index 1376b7a..ed6a0e8 100644 --- a/core.py +++ b/core.py @@ -1,4 +1,5 @@ from nmigen import * +from nmigen.hdl.rec import Layout, Record from enum import Enum @@ -7,27 +8,58 @@ from instruction_decoder import InstructionDecoder from regfile import RegisterFile # TODO replace this with a proper wishbone bus -MemoryBus = Record([ - ("rw", 1), - ("addr", 32), - ("data", 32), - ("i_valid", 1), - ("o_ready", 1), -]) +class DumbMemoryBus(Record): + def __init__(self): + super().__init__(Layout([ + ("rw", 1), + ("addr", unsigned(32)), + ("data", unsigned(32)), + ("i_valid", 1), + ("o_ready", 1), + ])) class IntImmediate(Enum): - ADDI = 0b000 - SLTI = 0b010 + # Integer opperations on immediates, as defined by funct3 field + ADDI = 0b000 + SLTI = 0b010 SLTIU = 0b011 - XORI = 0b100 - ORI = 0b110 - ANDI = 0b111 + XORI = 0b100 + ORI = 0b110 + ANDI = 0b111 + + SLLI = 0b001 + SRxI = 0b101 + +class IntRegReg(Enum): + # Integer register-register operations, as defined by funct3 field + ADD = 0b000 # Also SUB + SLL = 0b001 + SLT = 0b010 + SLTU = 0b011 + XOR = 0b100 + SRx = 0b101 # SRL/SRA + OR = 0b110 + AND = 0b111 + +class BranchCondition(Enum): + # Different BRANCH conditions, as defined by funct3 field + BEQ = 0b000 + BNE = 0b001 + BLT = 0b100 + BGE = 0b101 + BLTU = 0b110 + BGEU = 0b111 + +class LSWidth(Enum): + B = 0b00 + H = 0b01 + W = 0b10 class RV32ICore(Elaboratable): """Basic RV32-I core.""" def __init__(self): - self.mem = MemoryBus + self.mem = DumbMemoryBus() self.decoder = InstructionDecoder() def elaborate(self, platform): @@ -39,11 +71,9 @@ class RV32ICore(Elaboratable): pc = Signal(unsigned(32)) instr = Signal(unsigned(32)) # Internal reg to hold instrunction data - r = Array([Signal(32) for _ in range(32)]) decoder_ports = self.decoder.ports() - funct = decoder_ports[1] - funct = decoder_ports[2] + funct3 = decoder_ports[2] imm = decoder_ports[3] immu = decoder_ports[4] src = decoder_ports[5] @@ -52,6 +82,13 @@ class RV32ICore(Elaboratable): m.d.comb += self.decoder.instr.eq(instr) m.d.sync += regfile.wen.eq(0) + m.d.sync += self.mem.i_valid.eq(0) + + + # Load/store variables + load_dest = Signal(unsigned(5)) + load_unsigned = Signal() + ls_width = Signal(LSWidth) with m.FSM(): with m.State("READ_PC"): @@ -69,32 +106,188 @@ class RV32ICore(Elaboratable): m.next = "DECODE" with m.State("DECODE"): + # TODO likely integer ops should be deferred and split into a seperate ALU, + # Where instead of doing everything here, I load all the values into the ALU + # and pull from the results. + + # TODO should I pull the src1/src2/dest assignments out to all the time? with m.Switch(self.decoder.opcode): with m.Case(Opcodes.OP_IMM): m.next = "READ_PC" - m.d.comb += regfile.raddr1.eq(imm) - m.d.comb += regfile.waddr.eq(imm) + m.d.comb += regfile.raddr1.eq(src) + m.d.comb += regfile.waddr.eq(dest) m.d.sync += regfile.wen.eq(1) - with m.Switch(funct): + with m.Switch(funct3): with m.Case(IntImmediate.ADDI): - m.d.sync += regfile.wdata.eq(regfile.raddr1 + imm) + m.d.sync += regfile.wdata.eq(regfile.rdata1 + imm) with m.Case(IntImmediate.SLTI): - m.d.sync += regfile.wdata.eq(regfile.raddr1 + imm) + m.d.sync += regfile.wdata.eq(regfile.rdata1 < imm) with m.Case(IntImmediate.SLTIU): - m.d.sync += regfile.wdata.eq(regfile.raddr1 + immu) + # TODO evaluate if this casts correctly and does what I want + m.d.sync += regfile.wdata.eq(regfile.rdata1.as_unsigned() < immu) with m.Case(IntImmediate.ANDI): - m.d.sync += regfile.wdata.eq(regfile.raddr1 & immu) + m.d.sync += regfile.wdata.eq(regfile.rdata1 & immu) with m.Case(IntImmediate.ORI): - m.d.sync += regfile.wdata.eq(regfile.raddr1 | immu) + m.d.sync += regfile.wdata.eq(regfile.rdata1 | immu) with m.Case(IntImmediate.XORI): - m.d.sync += regfile.wdata.eq(regfile.raddr1 ^ immu) + m.d.sync += regfile.wdata.eq(regfile.rdata1 ^ immu) + + with m.Case(IntImmediate.SLLI): + m.d.sync += regfile.wdata.eq(regfile.rdata1.as_unsigned() << immu[0:4]) + + with m.Case(IntImmediate.SRxI): + with m.If(immu[10]): + # SRAI + m.d.sync += regfile.wdata.eq(regfile.rdata1.as_unsigned() >> immu[0:4]) + + with m.Else(): + # SRLI + m.d.sync += regfile.wdata.eq(regfile.rdata1 >> immu[0:4]) + + with m.Case(Opcodes.LUI): + m.d.comb += regfile.waddr.eq(dest) + m.d.sync += regfile.wdata.eq(immu) + m.d.sync += regfile.wen.eq(1) + + with m.Case(Opcodes.AUIPC): + m.d.comb += regfile.waddr.eq(dest) + m.d.sync += regfile.wdata.eq(pc + immu) + + with m.Case(Opcodes.OP): + m.next = "READ_PC" + + m.d.comb += regfile.raddr1.eq(self.decoder.src1) + m.d.comb += regfile.raddr2.eq(self.decoder.src2) + m.d.comb += regfile.waddr.eq(self.decoder.dest) + m.d.sync += regfile.wen.eq(1) + + with m.Switch(self.decoder.funct7): + with m.Case(IntRegReg.ADD): + with m.If(self.decoder.funct7[5]): + # SUB + m.d.sync += regfile.wdata.eq(regfile.rdata1 - regfile.rdata2) + with m.Else(): + # ADD + m.d.sync += regfile.wdata.eq(regfile.rdata1 + regfile.rdata2) + with m.Case(IntRegReg.SLL): + m.d.sync += regfile.wdata.eq(regfile.rdata1 << regfile.rdata2[0:4]) + with m.Case(IntRegReg.SLT): + m.d.sync += regfile.wdata.eq(regfile.rdata1 < regfile.rdata2) + with m.Case(IntRegReg.SLTU): + rdata1 = regfile.rdata1.as_unsigned() + rdata2 = regfile.rdata2.as_unsigned() + m.d.sync += regfile.wdata.eq(rdata1 < rdata2) + with m.Case(IntRegReg.XOR): + m.d.sync += regfile.wdata.eq(regfile.rdata1 ^ regfile.rdata2) + with m.Case(IntRegReg.SRx): + with m.If(self.decoder.funct7[5]): + # SRA + m.d.sync += regfile.wdata.eq(regfile.rdata1 << regfile.rdata2[0:4]) + with m.Else(): + # SRL + m.d.sync += regfile.wdata.eq(regfile.rdata1.as_unsigned() << regfile.rdata2[0:4]) + with m.Case(IntRegReg.OR): + m.d.sync += regfile.wdata.eq(regfile.rdata1 | regfile.rdata2) + with m.Case(IntRegReg.AND): + m.d.sync += regfile.wdata.eq(regfile.rdata1 & regfile.rdata2) + + # TODO raise misalign exception + with m.Case(Opcodes.JAL): + m.d.comb += regfile.waddr.eq(self.decoder.dest) + m.d.sync += regfile.wdata.eq(pc + 4) + m.d.sync += regfile.wen.eq(1) + m.d.sync += pc.eq(pc + self.decoder.imm) + + # TODO raise misalign exception + with m.Case(Opcodes.JALR): + m.d.comb += regfile.waddr.eq(self.decoder.dest) + m.d.comb += regfile.raddr1.eq(self.decoder.src1) + m.d.sync += regfile.wdata.eq(pc + 4) + m.d.sync += regfile.wen.eq(1) + m.d.sync += pc.eq(regfile.rdata1 + self.decoder.imm) + + # TODO misalign exception + with m.Case(Opcodes.BRANCH): + m.d.comb += regfile.raddr1.eq(self.decoder.src1) + m.d.comb += regfile.raddr2.eq(self.decoder.src2) + branch_condition = Signal() + + with m.Switch(self.decoder.funct3): + with m.Case(BranchCondition.BEQ): + m.d.comb += branch_condition.eq(regfile.rdata1 == regfile.rdata2) + with m.Case(BranchCondition.BNE): + m.d.comb += branch_condition.eq(regfile.rdata1 != regfile.rdata2) + with m.Case(BranchCondition.BLT): + m.d.comb += branch_condition.eq(regfile.rdata1 < regfile.rdata2) + with m.Case(BranchCondition.BGE): + m.d.comb += branch_condition.eq(regfile.rdata1 >= regfile.rdata2) + with m.Case(BranchCondition.BLTU): + m.d.comb += branch_condition.eq(regfile.rdata1.as_unsigned() < regfile.rdata2.as_unsigned()) + with m.Case(BranchCondition.BGEU): + m.d.comb += branch_condition.eq(regfile.rdata1.as_unsigned() >= regfile.rdata2.as_unsigned()) + + with m.If(branch_condition): + m.d.sync += pc.eq(pc + self.decoder.imm) + + with m.Case(Opcodes.LOAD): + m.d.comb += regfile.raddr1.eq(self.decoder.src1) + m.d.sync += load_dest.eq(self.decoder.dest) + + with m.If(~self.mem.o_ready): + m.next = "LOAD" + m.d.sync += self.mem.rw.eq(0) + m.d.sync += self.mem.addr.eq(regfile.rdata1 + self.decoder.imm) + m.d.sync += self.mem.i_valid.eq(1) + m.d.sync += ls_width.eq(self.decoder.funct3[0:1]) + m.d.sync += load_unsigned.eq(self.decoder.funct3[2]) + + with m.Case(Opcodes.STORE): + m.d.comb += regfile.raddr1.eq(self.decoder.src1) + m.d.comb += regfile.raddr2.eq(self.decoder.base) + + with m.If(~self.mem.o_ready): + m.next = "STORE" + m.d.sync += self.mem.rw.eq(1) + m.d.sync += self.mem.addr.eq(regfile.rdata2 + self.decoder.imm) + m.d.sync += self.mem.data.eq(regfile.rdata1) + m.d.sync += self.mem.i_valid.eq(1) + m.d.sync += ls_width.eq(self.decoder.funct3[0:1]) + + with m.Case(Opcodes.MISC_MEM): + with m.If(self.decoder.funct3 == 0): + # TODO impl NOP + pass + with m.Else(): + # TODO raise invalid instruction + pass + + with m.Case(Opcodes.SYSTEM): + # TODO need to impl privileged stuff before I can do this + pass + + # TODO raise invalid instruction + with m.Default(): + pass + + + with m.State("LOAD"): + m.d.comb += regfile.waddr.eq(load_dest) + m.d.sync += regfile.wdata.eq(self.mem.data) + + with m.If(self.mem.o_ready): + m.d.sync += regfile.wen.eq(1) + m.next = "READ_PC" + + with m.State("STORE"): + # TODO this shouldn't be necessary, just temp until memory is handled properly + m.next = "READ_PC" return m diff --git a/instruction_decoder.py b/instruction_decoder.py index cf5f2ff..5f74df6 100644 --- a/instruction_decoder.py +++ b/instruction_decoder.py @@ -8,35 +8,71 @@ class InstructionDecoder(Elaboratable): #### Output self.opcode = Signal(Opcodes) - self.funct = Signal(unsigned(3)) + self.funct3 = Signal(unsigned(3)) + self.funct7 = Signal(unsigned(7)) self.imm = Signal(32) self.immu = Signal(unsigned(32)) # Register selection - self.src = Signal(unsigned(5)) + self.src1 = Signal(unsigned(5)) + self.src2 = Signal(unsigned(5)) self.dest = Signal(unsigned(5)) self.base = Signal(unsigned(5)) def ports(self) -> tuple: - return (self.instr, self.opcode, self.funct, self.imm, self.immu, self.src, self.dest, self.base) + return (self.instr, self.opcode, self.funct3, self.imm, self.immu, self.src1, self.dest, self.base) def elaborate(self, platform): m = Module() m.d.comb += self.opcode.eq(self.instr[0:6]) - - # TODO do actual sign extension + # Default immediate to 0 whenever we can + m.d.comb += self.imm.eq(0) + m.d.comb += self.immu.eq(0) + m.d.comb += self.dest.eq(self.instr[7:11]) # TODO will likely move back into the switch with m.Switch(self.opcode): - with m.Case(Opcodes.OP_IMM): - m.d.comb += self.imm[0:10].eq(self.instr[20:30]) - for i in range(11, 32): - m.d.comb += self.imm[i].eq(self.instr[31]) + # I-Type Instructions + with m.Case(Opcodes.OP_IMM, Opcodes.JALR, Opcodes.LOAD, Opcodes.MISC_MEM): + # TODO does this sign-extend? + m.d.comb += self.imm.eq(self.instr[20:31].as_signed()) + m.d.comb += self.immu.eq(self.instr[20:31]) - m.d.comb += self.immu[0:11].eq(self.instr[20:31]) - m.d.comb += self.immu[12:31].eq(0) + m.d.comb += self.funct3.eq(self.instr[12:14]) + m.d.comb += self.src1.eq(self.instr[15:19]) - m.d.comb += self.funct.eq(self.instr[12:14]) - m.d.comb += self.src.eq(self.instr[15:19]) - m.d.comb += self.dest.eq(self.instr[7:11]) + # U-Type Instructions + with m.Case(Opcodes.LUI, Opcodes.AUIPC): + m.d.comb += self.imm[12:31].eq(self.instr[12:31].as_signed()) + m.d.comb += self.immu[12:31].eq(self.instr[12:31]) + + # R-Type Instructions + with m.Case(Opcodes.OP): + m.d.comb += self.funct3.eq(self.instr[12:14]) + m.d.comb += self.funct7.eq(self.instr[25:31]) + m.d.comb += self.src1.eq(self.instr[15:19]) + m.d.comb += self.src2.eq(self.instr[20:24]) + + # J-Type Instructions + with m.Case(Opcodes.JAL): + imm = Cat(Const(0, shape=1), self.instr[21:30], self.instr[20], self.instr[12:19], self.instr[31]) + m.d.comb += self.imm.eq(imm.as_signed()) + m.d.comb += self.immu.eq(imm.as_unsigned()) + + # B-Type Instructions + with m.Case(Opcodes.BRANCH): + imm = Cat(Const(0, shape=1), self.instr[8:11], self.instr[25:30], self.instr[7], self.instr[31]) + m.d.comb += self.imm.eq(imm.as_signed()) + m.d.comb += self.immu.eq(imm.as_unsigned()) + m.d.comb += self.funct3.eq(self.instr[12:14]) + m.d.comb += self.src1.eq(self.instr[15:19]) + m.d.comb += self.src2.eq(self.instr[20:24]) + + # S-Type Instructions + with m.Case(Opcodes.STORE): + m.d.comb += self.funct3.eq(self.instr[12:14]) + m.d.comb += self.base.eq(self.instr[15:19]) + m.d.comb += self.src1.eq(self.instr[20:24]) + m.d.comb += self.imm.eq(Cat(self.instr[7:11], self.instr[25:31]).as_signed()) + m.d.comb += self.immu.eq(Cat(self.instr[7:11], self.instr[25:31]).as_unsigned()) return m diff --git a/riscv-tests b/riscv-tests new file mode 160000 index 0000000..09cfdaa --- /dev/null +++ b/riscv-tests @@ -0,0 +1 @@ +Subproject commit 09cfdaacd9322cf0ac94818d8c852e1f4dc5bc4f