AXI4 Traffic Generator仿真研究与自定义IP(之四) - minichao9901/TangNano-20k-Zynq-7020 GitHub Wiki

AXI4 Traffic Generator仿真研究与自定义IP(之四)

1)目标

  • 自定义pwm ip
  • 可以支持寄存器配置周期,占空比
  • 可以支持写寄存器启动(产生start_pulse),支持完成一段时间的任务后,产生done_pulse,并产生标志寄存器可读

2)pwm_ip关键代码

实现代码

`timescale 1ns / 1ps
module my_pwm(
    input sys_clk,
    input sys_rst_n,
    
    input pwm_en,           //reg0
    input [7:0] set_freq,   //reg1
    input [7:0] set_width,  //reg2
    input pwm_start_pulse,  //reg3
    output reg led,
    output done
    );
    
reg [7:0] counter1;
reg [3:0] counter2;
reg counter_en;

always @(posedge sys_clk or negedge sys_rst_n)
if(sys_rst_n==0)
    counter_en<=0;
else if(pwm_start_pulse)
    counter_en<=1;
else if(counter1==set_freq && counter2==4'hf)
    counter_en<=0;


always @(posedge sys_clk or negedge sys_rst_n)
if(sys_rst_n==0)
    counter1<=0;
else if (counter_en) begin
    if (counter1==set_freq)
        counter1<=0;
    else
        counter1<=counter1+1;   
end
else 
    counter1<=0;
    
always @(posedge sys_clk or negedge sys_rst_n)
if(sys_rst_n==0)
    counter2<=0;
else if (counter_en) begin
    if (counter1==set_freq )
        counter2<=counter2+1;
end
else
    counter2<=0;

assign done=(counter_en && counter1==set_freq && counter2==4'hf)? 1:0;        
    
always @(*)
if(pwm_en==0)
    led=0;
else if(counter_en && counter1<=set_width)
    led=1;
else
    led=0;
    
endmodule

AXI4_Lite封装代码

  • 接口部分
		// Users to add ports here
        output wire led,
        output wire [3:0] start_pulse,
        output wire done_pulse,
		// User ports ends
  • 用户逻辑部分。可见第1个寄存器是使能,第2个寄存器是设置周期,第3个寄存器是设置占空比,第4个寄存器是启动,第5个寄存器是done标志
  • 注意需要对slv_reg4寄存器特殊处理,因为它需要被硬件写,又要被软件读写。因此把slv_reg4的读写从总的里面剥离出来,单独改写,增加硬件写的逻辑。
        // Add user logic here
    my_pwm u_my_pwm(
        .sys_clk(S_AXI_ACLK),
        .sys_rst_n(S_AXI_ARESETN),
        
        .pwm_en(slv_reg0[0]),
        .set_freq(slv_reg1[7:0]),
        .set_width(slv_reg2[7:0]),
        .pwm_start_pulse(start_pulse[3]),    
        .led(led),
        .done(done_pulse)
        );  
    
    // 将 start_pulse 信号输出到底层子模块
    assign start_pulse[0] = (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h0)? 1:0;
    assign start_pulse[1] = (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h1)? 1:0;
    assign start_pulse[2] = (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h2)? 1:0;
    assign start_pulse[3] = (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h3)? 1:0;

    // 假设 done_pulse 信号是从底层模块传递过来的
    wire done_pulse;
    
    // 在 AXI4-Lite 模块中检查 done_pulse 信号并更新 slv_reg4
	always @( posedge S_AXI_ACLK )
	begin
	  if ( S_AXI_ARESETN == 1'b0 )
	    begin
	      slv_reg4 <= 0;
	    end 
	  else if(done_pulse)
	       slv_reg4[0] <= 1;
	  else begin
	    if (slv_reg_wren)
	      begin
	        case ( axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB] )
	          3'h4:
	            for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
	              if ( S_AXI_WSTRB[byte_index] == 1 ) begin
	                // Respective byte enables are asserted as per write strobes 
	                // Slave register 4
	                slv_reg4[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
	              end  
	          default : begin
	                      slv_reg4 <= slv_reg4;
	                    end
	        endcase
	      end
	  end
	end   

	// User logic ends

补充说明:仔细看代码和波形发现,数据相对于slv_reg_wren/rden延迟了一拍。因此直接用slv_reg_wren组合逻辑产生start_pulse不合适,应该要将产生的start_pulse延迟一拍才行。也就是改写如下:

	always @( posedge S_AXI_ACLK )
	begin
	  if ( S_AXI_ARESETN == 1'b0 )
	    begin
	        start_pulse[3:0]<= 4'b0000;
	    end 
          else begin
           start_pulse[0] <= (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h0)? 1:0;
           start_pulse[1] <= (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h1)? 1:0;
           start_pulse[2] <= (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h2)? 1:0;
           start_pulse[3] <= (slv_reg_wren && axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]==2'h3)? 1:0;
          end

3)系统设计,用ATG驱动

image image

addr.coe

memory_initialization_radix=16;
memory_initialization_vector=
44A00000
44A00004
44A00008
44A0000c
ffffffff;

data.coe

memory_initialization_radix=16;
memory_initialization_vector=
00000001
000000ee
00000088
00000001
ffffffff;

4)仿真tb

`timescale 1ns / 1ps
module tb(

    );

reg sys_clk;
reg sys_rst_n;

initial sys_clk=0;
always #20 sys_clk=~sys_clk;

initial begin
sys_rst_n=0;
#10000;
sys_rst_n=1;
end

system_wrapper u_system_wrapper
(
    .sys_clk(sys_clk),
    .sys_rst_n(sys_rst_n)); 
    
      
endmodule

5)仿真波形

image image

可以看到寄存器写入的过程

image

注意slv_reg_wren的波形,每次写都会产生一个脉冲。我们利用这个信号来产生start_pulse。 可以看到每个寄存器写入均符合预期。

image

最后done信号的产生,以及slv_reg[4]置1。

image

整个的pwm计数过程正确

6)烧入FPGA

image image image 理论计算频率=50e6/(0xee+1)=209.205kHz, 与测量结果完全吻合。

7)用microblaze控制

image image

引脚定义pin_local.xdc

#create_clock -period 20.000 -name sys_clk [get_ports sys_clk]
#set_property -dict {PACKAGE_PIN N18 IOSTANDARD LVCMOS33} [get_ports sys_clk]
#set_property -dict {PACKAGE_PIN P14 IOSTANDARD LVCMOS33} [get_ports sys_rst_n]

#set_property -dict {PACKAGE_PIN H15 IOSTANDARD LVCMOS33} [get_ports uart_rx]
#set_property -dict {PACKAGE_PIN G15 IOSTANDARD LVCMOS33} [get_ports uart_tx]

#set_property -dict {PACKAGE_PIN M19 IOSTANDARD LVCMOS33} [get_ports {key_tri_io[0]}]
#set_property -dict {PACKAGE_PIN M20 IOSTANDARD LVCMOS33} [get_ports {key_tri_io[1]}]

set_property -dict {PACKAGE_PIN B20 IOSTANDARD LVCMOS33} [get_ports led]
set_property -dict {PACKAGE_PIN D20 IOSTANDARD LVCMOS33} [get_ports done_pulse]

set_property -dict {PACKAGE_PIN H18 IOSTANDARD LVCMOS33} [get_ports {start_pulse[0]}]
set_property -dict {PACKAGE_PIN F20 IOSTANDARD LVCMOS33} [get_ports {start_pulse[1]}]
set_property -dict {PACKAGE_PIN G20 IOSTANDARD LVCMOS33} [get_ports {start_pulse[2]}]
set_property -dict {PACKAGE_PIN H20 IOSTANDARD LVCMOS33} [get_ports {start_pulse[3]}]

8)程序设计

#include <stdio.h>
#include "platform.h"
#include "xil_printf.h"
#include "xparameters.h"
#include "sleep.h"

#define PMW_IP_BASEADDR XPAR_MY_PWM_0_S00_AXI_BASEADDR

void test_wr()
{
    Xil_Out32(PMW_IP_BASEADDR, 0x1);
    Xil_Out32(PMW_IP_BASEADDR+0x4, 0xAA);
    Xil_Out32(PMW_IP_BASEADDR+0x8, 0x55);
    Xil_Out32(PMW_IP_BASEADDR+0x10, 0x1);
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR));
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR+0x4));
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR+0x8));
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR+0x10));

    Xil_Out32(PMW_IP_BASEADDR, 0x0);
    Xil_Out32(PMW_IP_BASEADDR+0x4, 0x22);
    Xil_Out32(PMW_IP_BASEADDR+0x8, 0x33);
    Xil_Out32(PMW_IP_BASEADDR+0x10, 0x0);
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR));
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR+0x4));
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR+0x8));
    xil_printf("%x\r\n", Xil_In32(PMW_IP_BASEADDR+0x10));
    while(1);
}

int main()
{
    init_platform();

    print("Hello World\n\r");
    //test_wr();

    Xil_Out32(PMW_IP_BASEADDR, 0x1);        //pwm_en
    Xil_Out32(PMW_IP_BASEADDR+0x4, 0xAA);   //pwm_period
    Xil_Out32(PMW_IP_BASEADDR+0x8, 0x55);   //pwm_duty
    Xil_Out32(PMW_IP_BASEADDR+0xc, 0x1);    //pwm_start_pulse


    while(1){
    	u8 done=Xil_In32(PMW_IP_BASEADDR+0x10);   //pwm_done_flag
    	if(done==1){
    		//usleep(10*1000);
    		Xil_Out32(PMW_IP_BASEADDR+0xc, 0x1);  //pwm_start_pulse

    		Xil_Out32(PMW_IP_BASEADDR+0x10,0x0);  //pwm_done_flag clear
    		u8 v=Xil_In32(PMW_IP_BASEADDR+0x10);
//    		xil_printf("%d\r\n", v);
    	}
    }

    cleanup_platform();
    return 0;
}

9)运行结果

image image

如果usleep(10*1000)启用,效果符合预期。

image

如果把usleep(10*1000)注释掉,效果符合预期。

10)感悟

最初的时候,pwm_ip我们用了2个always对slv_reg4进行写操作,也就是软件写是一个always(保留原始的),硬件写是一个always。测试结果表明这样子不能work,只有硬件写起作用了,软件写不起作用。后来,把slv_reg4剥离出来,软件和硬件写合并为一个always之后,就可以了。

⚠️ **GitHub.com Fallback** ⚠️