///////////////////////////////////////////////////////////////////// // SPI FLASH Controller with CACHE for 32-bit cores (risc-v in mind) // by Jesús Arias Alvarez (2022) // Public domain, (bugs also ;) ///////////////////////////////////////////////////////////////////// module flashcache ( input clk, // main clock input flush, // invalidate cache (async) input [23:2]addr, // Flash Address (24 bits, LSBs always 00) output [31:0]do, // 32 bit data output input strobe, // Active for reads output sck, // SPI clock output sdo, // SPI output (MOSI) input sdi, // SPI input (MISO) output cs, // SPI slave select (Flash /CS), active low output wai // CPU must wait if 1 ); //`define SEQUENTIAL // sequential cache line allocation //`define RANDOM // random cache line allocation `define LRU // Least recently Used cache line Allocation // Make the CPU wait if there are not data available from the cache assign wai=nomatch|(~cs); // TAG registers with valid flags reg [16:0]tag[0:7]; reg [7:0]tagvalid=0; wire [7:0]match; assign match[0]=(tag[0]==addr[23:7])&tagvalid[0]; assign match[1]=(tag[1]==addr[23:7])&tagvalid[1]; assign match[2]=(tag[2]==addr[23:7])&tagvalid[2]; assign match[3]=(tag[3]==addr[23:7])&tagvalid[3]; assign match[4]=(tag[4]==addr[23:7])&tagvalid[4]; assign match[5]=(tag[5]==addr[23:7])&tagvalid[5]; assign match[6]=(tag[6]==addr[23:7])&tagvalid[6]; assign match[7]=(tag[7]==addr[23:7])&tagvalid[7]; wire nomatch = strobe&(match==0); // Not matching wire [2:0]cline = // Cache line (match[1] ? 3'b001 : 0 ) | (match[2] ? 3'b010 : 0 ) | (match[3] ? 3'b011 : 0 ) | (match[4] ? 3'b100 : 0 ) | (match[5] ? 3'b101 : 0 ) | (match[6] ? 3'b110 : 0 ) | (match[7] ? 3'b111 : 0 ); // tgix: Tag index for line allocation `ifdef SEQUENTIAL reg [2:0]tgix=0; // sequential allocation `endif `ifdef RANDOM reg [6:0]lfsr=7'h7F; // Pseudorandom allocation wire [2:0]tgix=lfsr[2:0]; `endif `ifdef LRU // Least recently used allocation reg [2:0]lru0=0; reg [2:0]lru1=1; reg [2:0]lru2=2; reg [2:0]lru3=3; reg [2:0]lru4=4; reg [2:0]lru5=5; reg [2:0]lru6=6; reg [2:0]lru7=7; wire [7:0]lrm; // Matching bits assign lrm[0]=(lru0==cline); assign lrm[1]=(lru1==cline); assign lrm[2]=(lru2==cline); assign lrm[3]=(lru3==cline); assign lrm[4]=(lru4==cline); assign lrm[5]=(lru5==cline); assign lrm[6]=(lru6==cline); assign lrm[7]=(lru7==cline); // Partial 3-bit shift register: shifts right from data #7 until match always @(posedge clk) if (strobe&(~nomatch)) begin if (lrm[0]) lru0<=lru1; if (lrm[0]|lrm[1]) lru1<=lru2; if (lrm[0]|lrm[1]|lrm[2]) lru2<=lru3; if (lrm[0]|lrm[1]|lrm[2]|lrm[3]) lru3<=lru4; if (lrm[0]|lrm[1]|lrm[2]|lrm[3]|lrm[4]) lru4<=lru5; if (lrm[0]|lrm[1]|lrm[2]|lrm[3]|lrm[4]|lrm[5]) lru5<=lru6; if (lrm[0]|lrm[1]|lrm[2]|lrm[3]|lrm[4]|lrm[5]|lrm[6]) lru6<=lru7; lru7<=cline; end wire [2:0]tgix=lru0; `endif always @(posedge clk or posedge flush) if (flush) tagvalid<=0; else if (nomatch) begin tag[tgix]<=addr[23:7]; tagvalid[tgix]<=1; end // Cache memory (2 BRAMS) reg [31:0]cache[0:255]; reg [31:0]do; // Cache write, with endian correction always @(negedge clk) if (wrcache) cache[{cline,wraddr}] <= {sh[7:0],sh[15:8],sh[23:16],sh[31:24]}; // Cache read always @(negedge clk) if (strobe) do <= cache[{cline,addr[6:2]}]; // SPI flash reader reg cs=1; reg [10:0]bitcnt=0; // Bit counter (>1023: CMD phase, <1024: Data phase) reg [31:0]sh; // Common shifter for input and output reg wrcache=0; // Write to cache memory pulse (every 32 SCK cycles) reg [4:0]wraddr; // Address to write (inside a cache line: 5 bits) always @(negedge clk) begin wrcache<=(~bitcnt[10])&(bitcnt[4:0]==31); wraddr<=bitcnt[9:5]; end assign sck=(~cs)&(~clk); // SCK pulses reg sdo; // SDO (MOSI) (changes on falling edges of SCK (clk inverted)) always @(posedge clk) sdo<=sh[30]&bitcnt[10]&(~cs); reg rsdi; // SDI (MISO) is sampled on rising edges of SCK always @(negedge clk) rsdi<=sdi&(~bitcnt[10]); always @(posedge clk) begin if (nomatch) begin cs<=0; bitcnt<=2016; // 32 pulses to data phase sh<={8'h03,addr[23:7],7'h00}; // Command READ and 24-bit address end else sh<={sh[30:0],rsdi}; if (~cs) bitcnt<=bitcnt+1; if (bitcnt==1023) begin cs<=1; `ifdef SEQUENTIAL tgix<=tgix+1; `endif `ifdef RANDOM lfsr<={lfsr[5:0],lfsr[5]^lfsr[6]}; `endif end end endmodule