The ability to easily access or modify the contents of a memory instantiated in FPGA chip via a computer can be very helpful during verification or debugging. In case of AMD/Xilinx chips and Vivado IDE, this can be achieved using a combination of several IPs and Tcl commands.
Creating a memory with JTAG interface
In order to enable block memory access from Vivado, firstly we need a way to communicate with the FPGA logic from a computer using Vivado IDE. Fortunately, we can use an IP block from AMD/Xilinx called “JTAG to AXI Master”. It allows us to create and execute AXI transactions from Vivado’s Tcl console. During generation, we can define the address and data buses widths as needed.
You may notice there is no memory block with AXI interface available in the IP catalog. Does it mean that we need to create our own? Fortunately we don’t. We can use another IP block called “AXI Block RAM (BRAM) Controller”. From our perspective it acts as a AXI transactions converter to memory operations. We can define the data width and memory size, as well as lead and write latency. From my experience, the combination of the two mentioned IPs is more reliable using AXI4 interface, rather than AXI4-Lite.
Lastly, we need to define a memory block itself. In this case I tend to instantiate another IP block for consistency. For this purpose we can use a Block Memory Generator IP. We need a dual-port RAM, with one port connected to the controller, and the second one to logic.
Below you can find a simplified block diagram of connected blocks as well as example Verilog code (which can be easily ported to VHDL).
module JTAG_RAM #(
parameter ADDR_W = 4,
parameter DATA_W = 256
)(
input wire i_clk,
input wire i_rst_n,
output wire o_ram_rsta_busy,
output wire o_ram_rstb_busy,
input wire i_ram_en,
input wire[DATA_W/4-1:0] i_ram_we,
input wire[ADDR_W-1:0] i_ram_addr,
input wire[DATA_W-1:0] i_ram_wrdata,
output wire[DATA_W-1:0] o_ram_rddata
);
wire[ 0:0] axi_awid;
wire[31:0] axi_awaddr;
wire[ 7:0] axi_awlen;
wire[ 2:0] axi_awsize;
wire[ 1:0] axi_awburst;
wire axi_awlock;
wire[ 3:0] axi_awcache;
wire[ 2:0] axi_awprot;
wire[ 3:0] axi_awqos;
wire axi_awvalid;
wire axi_awready;
wire[63:0] axi_wdata;
wire[ 7:0] axi_wstrb;
wire axi_wlast;
wire axi_wvalid;
wire axi_wready;
wire[ 0:0] axi_bid;
wire[ 1:0] axi_bresp;
wire axi_bvalid;
wire axi_bready;
wire[ 0:0] axi_arid;
wire[31:0] axi_araddr;
wire[ 7:0] axi_arlen;
wire[ 2:0] axi_arsize;
wire[ 1:0] axi_arburst;
wire axi_arlock;
wire[ 3:0] axi_arcache;
wire[ 2:0] axi_arprot;
wire[ 3:0] axi_arqos;
wire axi_arvalid;
wire axi_arready;
wire[ 0:0] axi_rid;
wire[63:0] axi_rdata;
wire[ 1:0] axi_rresp;
wire axi_rlast;
wire axi_rvalid;
wire axi_rready;
wire bram_rst_a;
wire bram_clk_a;
wire bram_en_a;
wire[ 7:0] bram_we_a;
wire[16:0] bram_addr_a;
wire[63:0] bram_wrdata_a;
wire[63:0] bram_rddata_a;
jtag_axi_0 JTAG_axi_inst (
.aclk(i_clk), // input wire aclk
.aresetn(i_rst_n), // input wire aresetn
.m_axi_awid(axi_awid), // output wire [0 : 0] m_axi_awid
.m_axi_awaddr(axi_awaddr), // output wire [31 : 0] m_axi_awaddr
.m_axi_awlen(axi_awlen), // output wire [7 : 0] m_axi_awlen
.m_axi_awsize(axi_awsize), // output wire [2 : 0] m_axi_awsize
.m_axi_awburst(axi_awburst), // output wire [1 : 0] m_axi_awburst
.m_axi_awlock(axi_awlock), // output wire m_axi_awlock
.m_axi_awcache(axi_awcache), // output wire [3 : 0] m_axi_awcache
.m_axi_awprot(axi_awprot), // output wire [2 : 0] m_axi_awprot
.m_axi_awqos(axi_awqos), // output wire [3 : 0] m_axi_awqos
.m_axi_awvalid(axi_awvalid), // output wire m_axi_awvalid
.m_axi_awready(axi_awready), // input wire m_axi_awready
.m_axi_wdata(axi_wdata), // output wire [63 : 0] m_axi_wdata
.m_axi_wstrb(axi_wstrb), // output wire [7 : 0] m_axi_wstrb
.m_axi_wlast(axi_wlast), // output wire m_axi_wlast
.m_axi_wvalid(axi_wvalid), // output wire m_axi_wvalid
.m_axi_wready(axi_wready), // input wire m_axi_wready
.m_axi_bid(axi_bid), // input wire [0 : 0] m_axi_bid
.m_axi_bresp(axi_bresp), // input wire [1 : 0] m_axi_bresp
.m_axi_bvalid(axi_bvalid), // input wire m_axi_bvalid
.m_axi_bready(axi_bready), // output wire m_axi_bready
.m_axi_arid(axi_arid), // output wire [0 : 0] m_axi_arid
.m_axi_araddr(axi_araddr), // output wire [31 : 0] m_axi_araddr
.m_axi_arlen(axi_arlen), // output wire [7 : 0] m_axi_arlen
.m_axi_arsize(axi_arsize), // output wire [2 : 0] m_axi_arsize
.m_axi_arburst(axi_arburst), // output wire [1 : 0] m_axi_arburst
.m_axi_arlock(axi_arlock), // output wire m_axi_arlock
.m_axi_arcache(axi_arcache), // output wire [3 : 0] m_axi_arcache
.m_axi_arprot(axi_arprot), // output wire [2 : 0] m_axi_arprot
.m_axi_arqos(axi_arqos), // output wire [3 : 0] m_axi_arqos
.m_axi_arvalid(axi_arvalid), // output wire m_axi_arvalid
.m_axi_arready(axi_arready), // input wire m_axi_arready
.m_axi_rid(axi_rid), // input wire [0 : 0] m_axi_rid
.m_axi_rdata(axi_rdata), // input wire [63 : 0] m_axi_rdata
.m_axi_rresp(axi_rresp), // input wire [1 : 0] m_axi_rresp
.m_axi_rlast(axi_rlast), // input wire m_axi_rlast
.m_axi_rvalid(axi_rvalid), // input wire m_axi_rvalid
.m_axi_rready(axi_rready) // output wire m_axi_rready
);
axi_bram_ctrl_0 AXI_BRAM_ctrl (
.s_axi_aclk(i_clk), // input wire s_axi_aclk
.s_axi_aresetn(i_rst_n), // input wire s_axi_aresetn
.s_axi_awid(axi_awid), // input wire [0 : 0] s_axi_awid
.s_axi_awaddr(axi_awaddr[16:0]), // input wire [16 : 0] s_axi_awaddr
.s_axi_awlen(axi_awlen), // input wire [7 : 0] s_axi_awlen
.s_axi_awsize(axi_awsize), // input wire [2 : 0] s_axi_awsize
.s_axi_awburst(axi_awburst), // input wire [1 : 0] s_axi_awburst
.s_axi_awlock(axi_awlock), // input wire s_axi_awlock
.s_axi_awcache(axi_awcache), // input wire [3 : 0] s_axi_awcache
.s_axi_awprot(axi_awprot), // input wire [2 : 0] s_axi_awprot
.s_axi_awvalid(axi_awvalid), // input wire s_axi_awvalid
.s_axi_awready(axi_awready), // output wire s_axi_awready
.s_axi_wdata(axi_wdata), // input wire [63 : 0] s_axi_wdata
.s_axi_wstrb(axi_wstrb), // input wire [7 : 0] s_axi_wstrb
.s_axi_wlast(axi_wlast), // input wire s_axi_wlast
.s_axi_wvalid(axi_wvalid), // input wire s_axi_wvalid
.s_axi_wready(axi_wready), // output wire s_axi_wready
.s_axi_bid(axi_bid), // output wire [0 : 0] s_axi_bid
.s_axi_bresp(axi_bresp), // output wire [1 : 0] s_axi_bresp
.s_axi_bvalid(axi_bvalid), // output wire s_axi_bvalid
.s_axi_bready(axi_bready), // input wire s_axi_bready
.s_axi_arid(axi_arid), // input wire [0 : 0] s_axi_arid
.s_axi_araddr(axi_araddr[16:0]), // input wire [16 : 0] s_axi_araddr
.s_axi_arlen(axi_arlen), // input wire [7 : 0] s_axi_arlen
.s_axi_arsize(axi_arsize), // input wire [2 : 0] s_axi_arsize
.s_axi_arburst(axi_arburst), // input wire [1 : 0] s_axi_arburst
.s_axi_arlock(axi_arlock), // input wire s_axi_arlock
.s_axi_arcache(axi_arcache), // input wire [3 : 0] s_axi_arcache
.s_axi_arprot(axi_arprot), // input wire [2 : 0] s_axi_arprot
.s_axi_arvalid(axi_arvalid), // input wire s_axi_arvalid
.s_axi_arready(axi_arready), // output wire s_axi_arready
.s_axi_rid(axi_rid), // output wire [0 : 0] s_axi_rid
.s_axi_rdata(axi_rdata), // output wire [63 : 0] s_axi_rdata
.s_axi_rresp(axi_rresp), // output wire [1 : 0] s_axi_rresp
.s_axi_rlast(axi_rlast), // output wire s_axi_rlast
.s_axi_rvalid(axi_rvalid), // output wire s_axi_rvalid
.s_axi_rready(axi_rready), // input wire s_axi_rready
.bram_rst_a(bram_rst_a), // output wire bram_rst_a
.bram_clk_a(bram_clk_a), // output wire bram_clk_a
.bram_en_a(bram_en_a), // output wire bram_en_a
.bram_we_a(bram_we_a), // output wire [7 : 0] bram_we_a
.bram_addr_a(bram_addr_a), // output wire [16 : 0] bram_addr_a
.bram_wrdata_a(bram_wrdata_a), // output wire [63 : 0] bram_wrdata_a
.bram_rddata_a(bram_rddata_a) // input wire [63 : 0] bram_rddata_a
);
blk_mem_gen_0 data_RAM_inst (
.clka(bram_clk_a), // input wire clka
.rsta(bram_rst_a), // input wire rsta
.ena(bram_en_a), // input wire ena
.wea(bram_we_a), // input wire [7 : 0] wea
.addra(bram_addr_a[16:3]), // input wire [16 : 0] addra
.dina(bram_wrdata_a), // input wire [63 : 0] dina
.douta(bram_rddata_a), // output wire [63 : 0] douta
.clkb(i_clk), // input wire clkb
.enb(i_ram_en), // input wire enb
.web(i_ram_we), // input wire [31 : 0] web
.addrb(i_ram_addr), // input wire [11 : 0] addrb
.dinb(i_ram_wrdata), // input wire [255 : 0] dinb
.doutb(o_ram_rddata), // output wire [255 : 0] doutb
.rsta_busy(o_ram_rsta_busy), // output wire rsta_busy
.rstb_busy(o_ram_rstb_busy) // output wire rstb_busy
);
endmodule
Accessing the FPGA memory via Vivado
The block memory can be accessed using Tcl commands. My particular applications required the FPGA memory to be written by other logic components, and only read via Vivado. To automate this I’ve created a script reading the whole memory and writing the contents to a CSV file. You can relatively easily update it to enable writing capability using create_hw_axi_txn -type write
– for more information check the Tcl commands documentation. To improve the execution time, the script reads 256 memory words per each transaction.
set START_ADDRESS_HEX 0
set LAST_ADDRESS_HEX 3fff
set CSV_FILENAME "results/jtag_axi_mem.csv"
############################################################
############################################################
set ADDRESS_INCR_HEX 8
set addr [expr 0x$START_ADDRESS_HEX]
set last_address_dec [expr {[expr 0x$LAST_ADDRESS_HEX] << 3}]
set addr_incr_dec [expr 0x$ADDRESS_INCR_HEX]
set TRANSOBJ_NAME_PREFIX "td_txn_"
delete_hw_axi_txn [get_hw_axi_txn -quiet] -quiet
while {$addr <= $last_address_dec} {
set addr_hex [format %x $addr]
set objname $TRANSOBJ_NAME_PREFIX$addr_hex
create_hw_axi_txn $objname [get_hw_axis hw_axi_1] -address $addr_hex -len 256 -type read
set addr [expr {$addr + 256 * $addr_incr_dec}]
}
run_hw_axi [get_hw_axi_txns] -quiet
set result_orig [lsearch -inline -all -not -exact [split [report_hw_axi_txn -t b4 [get_hw_axi_txns]] "\n"] {}]
# print report (read data) as string
# split string lines into separate lists
# remove {} (empty lists)
unset -nocomplain result
set word_cnt 0
set value ""
# convert AXI addresses to actual BRAM addresses in results
foreach res $result_orig {
set orig_addr [expr 0x[lindex $res 0]]
set act_addr [expr {$orig_addr >> 3}]
if {$word_cnt < 7} {
set value [lindex $res 1]$value
incr word_cnt
} else {
unset -nocomplain res_elem
lappend res_elem [format %04x [expr {[expr {$act_addr - 3}] >> 3}]]
lappend res_elem [lindex $res 1]$value
lappend result $res_elem
set word_cnt 0
set value ""
}
}
# add header
set result [linsert $result 0 {"address" "data"}]
# write to CSV file
package require csv
set f [open $CSV_FILENAME w]
puts $f [csv::joinlist $result]
close $f
Example use cases
I have used this approach multiple times. One of them was during Low latency 100 Gbps communication system debugging. Another one was during my research for my PhD. Hopefully in the near future I will be able to update this post with a link to my published article.