Bandwidth measurement exceeds theoretical bandwidth limit

Questions and discussions about the Xillybus IP core and drivers

Bandwidth measurement exceeds theoretical bandwidth limit

Postby kevin » Mon May 14, 2018 9:59 am

Hi, I am using Virtex Ultrascale VCU108 and the following c and verilog coding are for loopback demo bundle for XL 128 bits revision downloaded from IP core factory.

May I know why bandwidth measurement exceeds theoretical bandwidth limit in http://xillybus.com/pcie-download ?
Note: I am getting measurement up to 9.0 GB/s

Code: Select all
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <pthread.h>
#include <stdint.h>
#include <sys/time.h>
#include "timer.h"

//typedef uint128_t = __uint128_t;

const unsigned int STREAM_WIDTH = 128;
const unsigned int NUM_OF_BITS_PER_BYTE = 8;

int fdw128 = 0;
int fdr128 = 0;

unsigned int N = 0;
unsigned int i;
unsigned __int128 *array_input;
unsigned __int128 *array_hardware;
struct timeval tstart,tend,tv1;
ssize_t t1,t2,temp1;

int main(int argc, char *argv[]) {

   fdw128 = open("/dev/xillybus_write_128", O_WRONLY);
   fdr128 = open("/dev/xillybus_read_128", O_RDONLY);

   GET_TIME_INIT(3);

   N = atoi(argv[1]);//10;
//   N = N*4;

   if (fdw128 < 0 || fdr128 < 0) {
      perror("Failed to open devfiles");
      exit(1);
   }

   printf("sizeof(unsigned __int128) = %d \n\r", sizeof(unsigned __int128));

   //allocate memory
   array_input = (unsigned __int128*) malloc(N*sizeof(unsigned __int128));
   array_hardware = (unsigned __int128*) malloc(N*sizeof(unsigned __int128));

   // generate inputs and prepare outputs
   for(i=0; i<N; i++){
      array_input[i] = i;
      array_hardware[i] = 0;
   }

//   gettimeofday(&tstart, NULL);
   GET_TIME_VAL(0);

   t1 = write(fdw128, array_input, sizeof(unsigned __int128)*N);
   temp1 = write(fdw128, NULL, 0);//a notification at the end of writing data
   
//   gettimeofday(&tv1, NULL);
   GET_TIME_VAL(1);

   t2 = read(fdr128, array_hardware, sizeof(unsigned __int128)*N);
//   gettimeofday(&tend, NULL);
   GET_TIME_VAL(2);

   for(i=0; i<N; i++){
                if(array_input[i] != array_hardware[i]) {
                        printf("recv[%d]: %d , expected %d \n\r", i, array_hardware[i], array_input[i]);
                        return 0;
                }
        }

      printf("write time: %f us.\t send bw: %f MB/s.\n", (TIME_VAL_TO_MS(1) - TIME_VAL_TO_MS(0))*1000.0,
            N*(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)/1024/1024/((TIME_VAL_TO_MS(1) - TIME_VAL_TO_MS(0))/1000.0));

        printf("read time: %f us.\t rev bw: %f MB/s.\n", (TIME_VAL_TO_MS(2) - TIME_VAL_TO_MS(1))*1000.0,
            N*(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE)/1024/1024/((TIME_VAL_TO_MS(2) - TIME_VAL_TO_MS(1))/1000.0));

        printf("round-trip time: %f us.\t total bw: %f MS/s\n", (TIME_VAL_TO_MS(2) - TIME_VAL_TO_MS(0))*1000.0,
            N*1.0/1024/1024/((TIME_VAL_TO_MS(2) - TIME_VAL_TO_MS(0))/1000.0));

   return 0;
}


Code: Select all
module xillydemo
  (
   input  PCIE_PERST_B_LS,
   input  PCIE_REFCLK_N,
   input  PCIE_REFCLK_P,
   input [7:0] PCIE_RX_N,
   input [7:0] PCIE_RX_P,
   output [3:0] GPIO_LED,
   output [7:0] PCIE_TX_N,
   output [7:0] PCIE_TX_P
   );
   // Clock and quiesce
   wire    bus_clk;
   wire    quiesce;
   
   // Memory array
   reg [63:0]    demoarray[0:31];

   
  // Wires related to /dev/xillybus_mem_64
   wire  user_r_mem_64_rden;
   wire  user_r_mem_64_empty;
   reg  [63:0] user_r_mem_64_data;
   wire  user_r_mem_64_eof;
   wire  user_r_mem_64_open;
   wire  user_w_mem_64_wren;
   wire  user_w_mem_64_full;
   wire [63:0] user_w_mem_64_data;
   wire  user_w_mem_64_open;
   wire [31:0] user_mem_64_addr;
   wire  user_mem_64_addr_update;

  // Wires related to /dev/xillybus_read_128
  wire  user_r_read_128_rden;
  wire  user_r_read_128_empty;
  wire [127:0] user_r_read_128_data;
  wire  user_r_read_128_eof;
  wire  user_r_read_128_open;

   // Wires related to /dev/xillybus_read_8
   wire        user_r_read_8_rden;
   wire        user_r_read_8_empty;
   wire [7:0]  user_r_read_8_data;
   wire        user_r_read_8_eof;
   wire        user_r_read_8_open;

  // Wires related to /dev/xillybus_write_128
  wire  user_w_write_128_wren;
  wire  user_w_write_128_full;
  wire [127:0] user_w_write_128_data;
  wire  user_w_write_128_open;

   // Wires related to /dev/xillybus_write_8
   wire        user_w_write_8_wren;
   wire        user_w_write_8_full;
   wire [7:0]  user_w_write_8_data;
   wire        user_w_write_8_open;

   xillybus xillybus_ins (

           // Ports related to /dev/xillybus_mem_8
           // FPGA to CPU signals:
           .user_r_mem_64_rden(user_r_mem_64_rden),
           .user_r_mem_64_empty(user_r_mem_64_empty),
           .user_r_mem_64_data(user_r_mem_64_data),
           .user_r_mem_64_eof(user_r_mem_64_eof),
           .user_r_mem_64_open(user_r_mem_64_open),

           // CPU to FPGA signals:
           .user_w_mem_64_wren(user_w_mem_64_wren),
           .user_w_mem_64_full(user_w_mem_64_full),
           .user_w_mem_64_data(user_w_mem_64_data),
           .user_w_mem_64_open(user_w_mem_64_open),

           // Address signals:
           .user_mem_64_addr(user_mem_64_addr),
           .user_mem_64_addr_update(user_mem_64_addr_update),


    // Ports related to /dev/xillybus_read_128
    // FPGA to CPU signals:
    .user_r_read_128_rden(user_r_read_128_rden),
    .user_r_read_128_empty(user_r_read_128_empty),
    .user_r_read_128_data(user_r_read_128_data),
    .user_r_read_128_eof(user_r_read_128_eof),
    .user_r_read_128_open(user_r_read_128_open),

    // Ports related to /dev/xillybus_write_128
    // CPU to FPGA signals:
    .user_w_write_128_wren(user_w_write_128_wren),
    .user_w_write_128_full(user_w_write_128_full),
    .user_w_write_128_data(user_w_write_128_data),
    .user_w_write_128_open(user_w_write_128_open),


           // Ports related to /dev/xillybus_read_8
           // FPGA to CPU signals:
           .user_r_read_8_rden(user_r_read_8_rden),
           .user_r_read_8_empty(user_r_read_8_empty),
           .user_r_read_8_data(user_r_read_8_data),
           .user_r_read_8_eof(user_r_read_8_eof),
           .user_r_read_8_open(user_r_read_8_open),

           // Ports related to /dev/xillybus_write_8
           // CPU to FPGA signals:
           .user_w_write_8_wren(user_w_write_8_wren),
           .user_w_write_8_full(user_w_write_8_full),
           .user_w_write_8_data(user_w_write_8_data),
           .user_w_write_8_open(user_w_write_8_open),


           // Signals to top level
           .PCIE_PERST_B_LS(PCIE_PERST_B_LS),
           .PCIE_REFCLK_N(PCIE_REFCLK_N),
           .PCIE_REFCLK_P(PCIE_REFCLK_P),
           .PCIE_RX_N(PCIE_RX_N),
           .PCIE_RX_P(PCIE_RX_P),
           .GPIO_LED(GPIO_LED),
           .PCIE_TX_N(PCIE_TX_N),
           .PCIE_TX_P(PCIE_TX_P),
           .bus_clk(bus_clk),
           .quiesce(quiesce)
           );

   // A simple inferred RAM
   always @(posedge bus_clk)
     begin
   if (user_w_mem_64_wren)
     demoarray[user_mem_64_addr] <= user_w_mem_64_data;
   
   if (user_r_mem_64_rden)
     user_r_mem_64_data <= demoarray[user_mem_64_addr];    
     end

   assign  user_r_mem_64_empty = 0;
   assign  user_r_mem_64_eof = 0;
   assign  user_w_mem_64_full = 0;

   // 32-bit loopback
//   fifo_32x512 fifo_32
//     (
//      .clk(bus_clk),
//      .srst(!user_w_write_32_open && !user_r_read_32_open),
//      .din(user_w_write_32_data),
//      .wr_en(user_w_write_32_wren),
//      .rd_en(user_r_read_32_rden),
//      .dout(user_r_read_32_data),
//      .full(user_w_write_32_full),
//      .empty(user_r_read_32_empty)
//      );

//   assign  user_r_read_32_eof = 0;

   fifo_128 fifo_128
     (
      .clk(bus_clk),
      .srst(!user_w_write_128_open && !user_r_read_128_open),
      .din(user_w_write_128_data),
      .wr_en(user_w_write_128_wren),
      .rd_en(user_r_read_128_rden),
      .dout(user_r_read_128_data),
      .full(user_w_write_128_full),
      .empty(user_r_read_128_empty)
      );

   assign  user_r_read_128_eof = 0;
   
   // 8-bit loopback
   fifo_8x2048 fifo_8
     (
      .clk(bus_clk),
      .srst(!user_w_write_8_open && !user_r_read_8_open),
      .din(user_w_write_8_data),
      .wr_en(user_w_write_8_wren),
      .rd_en(user_r_read_8_rden),
      .dout(user_r_read_8_data),
      .full(user_w_write_8_full),
      .empty(user_r_read_8_empty)
      );

   assign  user_r_read_8_eof = 0;
   
endmodule


phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 248.046875 us. send bw: 4031.496063 MB/s.
read time: 213.867188 us. rev bw: 4675.799087 MB/s.
round-trip time: 461.914062 us. total bw: 135.306554 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 238.037109 us. send bw: 4201.025641 MB/s.
read time: 200.927734 us. rev bw: 4976.913730 MB/s.
round-trip time: 438.964844 us. total bw: 142.380423 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 250.000000 us. send bw: 4000.000000 MB/s.
read time: 334.960938 us. rev bw: 2985.422741 MB/s.
round-trip time: 584.960938 us. total bw: 106.844741 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 263.916016 us. send bw: 3789.084181 MB/s.
read time: 223.144531 us. rev bw: 4481.400438 MB/s.
round-trip time: 487.060547 us. total bw: 128.320802 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 249.023438 us. send bw: 4015.686275 MB/s.
read time: 213.867188 us. rev bw: 4675.799087 MB/s.
round-trip time: 462.890625 us. total bw: 135.021097 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 272.949219 us. send bw: 3663.685152 MB/s.
read time: 183.837891 us. rev bw: 5439.575033 MB/s.
round-trip time: 456.787109 us. total bw: 136.825227 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 249.023438 us. send bw: 4015.686275 MB/s.
read time: 218.017578 us. rev bw: 4586.786114 MB/s.
round-trip time: 467.041016 us. total bw: 133.821223 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 247.070312 us. send bw: 4047.430830 MB/s.
read time: 216.796875 us. rev bw: 4612.612613 MB/s.
round-trip time: 463.867188 us. total bw: 134.736842 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 247.070312 us. send bw: 4047.430830 MB/s.
read time: 213.867188 us. rev bw: 4675.799087 MB/s.
round-trip time: 460.937500 us. total bw: 135.593220 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 225.830078 us. send bw: 4428.108108 MB/s.
read time: 163.085938 us. rev bw: 6131.736527 MB/s.
round-trip time: 388.916016 us. total bw: 160.703076 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 236.083984 us. send bw: 4235.780765 MB/s.
read time: 191.894531 us. rev bw: 5211.195929 MB/s.
round-trip time: 427.978516 us. total bw: 146.035368 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 250.000000 us. send bw: 4000.000000 MB/s.
read time: 209.960938 us. rev bw: 4762.790698 MB/s.
round-trip time: 459.960938 us. total bw: 135.881104 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 245.849609 us. send bw: 4067.527309 MB/s.
read time: 213.134766 us. rev bw: 4691.867125 MB/s.
round-trip time: 458.984375 us. total bw: 136.170213 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 242.187500 us. send bw: 4129.032258 MB/s.
read time: 204.833984 us. rev bw: 4882.002384 MB/s.
round-trip time: 447.021484 us. total bw: 139.814309 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 257.080078 us. send bw: 3889.838557 MB/s.
read time: 213.867188 us. rev bw: 4675.799087 MB/s.
round-trip time: 470.947266 us. total bw: 132.711249 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 125.976562 us. send bw: 7937.984496 MB/s.
read time: 241.943359 us. rev bw: 4133.198789 MB/s.
round-trip time: 367.919922 us. total bw: 169.873922 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 247.802734 us. send bw: 4035.467980 MB/s.
read time: 212.158203 us. rev bw: 4713.463751 MB/s.
round-trip time: 459.960938 us. total bw: 135.881104 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 247.070312 us. send bw: 4047.430830 MB/s.
read time: 219.970703 us. rev bw: 4546.059933 MB/s.
round-trip time: 467.041016 us. total bw: 133.821223 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 255.126953 us. send bw: 3919.617225 MB/s.
read time: 215.087891 us. rev bw: 4649.262202 MB/s.
round-trip time: 470.214844 us. total bw: 132.917965 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 197.998047 us. send bw: 5050.554871 MB/s.
read time: 168.945312 us. rev bw: 5919.075145 MB/s.
round-trip time: 366.943359 us. total bw: 170.326015 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 245.849609 us. send bw: 4067.527309 MB/s.
read time: 218.017578 us. rev bw: 4586.786114 MB/s.
round-trip time: 463.867188 us. total bw: 134.736842 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 247.070312 us. send bw: 4047.430830 MB/s.
read time: 211.914062 us. rev bw: 4718.894009 MB/s.
round-trip time: 458.984375 us. total bw: 136.170213 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 253.906250 us. send bw: 3938.461538 MB/s.
read time: 220.947266 us. rev bw: 4525.966851 MB/s.
round-trip time: 474.853516 us. total bw: 131.619537 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 249.023438 us. send bw: 4015.686275 MB/s.
read time: 209.960938 us. rev bw: 4762.790698 MB/s.
round-trip time: 458.984375 us. total bw: 136.170213 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 249.023438 us. send bw: 4015.686275 MB/s.
read time: 218.994141 us. rev bw: 4566.332219 MB/s.
round-trip time: 468.017578 us. total bw: 133.541993 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 252.929688 us. send bw: 3953.667954 MB/s.
read time: 212.890625 us. rev bw: 4697.247706 MB/s.
round-trip time: 465.820312 us. total bw: 134.171908 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 253.906250 us. send bw: 3938.461538 MB/s.
read time: 213.867188 us. rev bw: 4675.799087 MB/s.
round-trip time: 467.773438 us. total bw: 133.611691 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 249.023438 us. send bw: 4015.686275 MB/s.
read time: 219.970703 us. rev bw: 4546.059933 MB/s.
round-trip time: 468.994141 us. total bw: 133.263925 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 265.136719 us. send bw: 3771.639042 MB/s.
read time: 222.900391 us. rev bw: 4486.308872 MB/s.
round-trip time: 488.037109 us. total bw: 128.064032 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 142.089844 us. send bw: 7037.800687 MB/s.
read time: 203.125000 us. rev bw: 4923.076923 MB/s.
round-trip time: 345.214844 us. total bw: 181.046676 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 250.976562 us. send bw: 3984.435798 MB/s.
read time: 244.873047 us. rev bw: 4083.748754 MB/s.
round-trip time: 495.849609 us. total bw: 126.046283 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 286.865234 us. send bw: 3485.957447 MB/s.
read time: 261.230469 us. rev bw: 3828.037383 MB/s.
round-trip time: 548.095703 us. total bw: 114.031180 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 273.193359 us. send bw: 3660.411081 MB/s.
read time: 218.994141 us. rev bw: 4566.332219 MB/s.
round-trip time: 492.187500 us. total bw: 126.984127 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 251.220703 us. send bw: 3980.563654 MB/s.
read time: 212.890625 us. rev bw: 4697.247706 MB/s.
round-trip time: 464.111328 us. total bw: 134.665965 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 209.960938 us. send bw: 4762.790698 MB/s.
read time: 187.988281 us. rev bw: 5319.480519 MB/s.
round-trip time: 397.949219 us. total bw: 157.055215 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 246.093750 us. send bw: 4063.492063 MB/s.
read time: 209.960938 us. rev bw: 4762.790698 MB/s.
round-trip time: 456.054688 us. total bw: 137.044968 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 263.916016 us. send bw: 3789.084181 MB/s.
read time: 220.947266 us. rev bw: 4525.966851 MB/s.
round-trip time: 484.863281 us. total bw: 128.902316 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 248.046875 us. send bw: 4031.496063 MB/s.
read time: 218.994141 us. rev bw: 4566.332219 MB/s.
round-trip time: 467.041016 us. total bw: 133.821223 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 248.046875 us. send bw: 4031.496063 MB/s.
read time: 215.087891 us. rev bw: 4649.262202 MB/s.
round-trip time: 463.134766 us. total bw: 134.949921 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 258.056641 us. send bw: 3875.118259 MB/s.
read time: 212.890625 us. rev bw: 4697.247706 MB/s.
round-trip time: 470.947266 us. total bw: 132.711249 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 245.849609 us. send bw: 4067.527309 MB/s.
read time: 215.087891 us. rev bw: 4649.262202 MB/s.
round-trip time: 460.937500 us. total bw: 135.593220 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 277.832031 us. send bw: 3599.297012 MB/s.
read time: 218.994141 us. rev bw: 4566.332219 MB/s.
round-trip time: 496.826172 us. total bw: 125.798526 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 265.869141 us. send bw: 3761.248852 MB/s.
read time: 251.953125 us. rev bw: 3968.992248 MB/s.
round-trip time: 517.822266 us. total bw: 120.697784 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 251.953125 us. send bw: 3968.992248 MB/s.
read time: 211.914062 us. rev bw: 4718.894009 MB/s.
round-trip time: 463.867188 us. total bw: 134.736842 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 250.976562 us. send bw: 3984.435798 MB/s.
read time: 211.914062 us. rev bw: 4718.894009 MB/s.
round-trip time: 462.890625 us. total bw: 135.021097 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 257.080078 us. send bw: 3889.838557 MB/s.
read time: 227.783203 us. rev bw: 4390.139335 MB/s.
round-trip time: 484.863281 us. total bw: 128.902316 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 230.957031 us. send bw: 4329.809725 MB/s.
read time: 197.021484 us. rev bw: 5075.588600 MB/s.
round-trip time: 427.978516 us. total bw: 146.035368 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 248.779297 us. send bw: 4019.627085 MB/s.
read time: 218.994141 us. rev bw: 4566.332219 MB/s.
round-trip time: 467.773438 us. total bw: 133.611691 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 277.099609 us. send bw: 3608.810573 MB/s.
read time: 222.900391 us. rev bw: 4486.308872 MB/s.
round-trip time: 500.000000 us. total bw: 125.000000 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 255.859375 us. send bw: 3908.396947 MB/s.
read time: 237.060547 us. rev bw: 4218.331617 MB/s.
round-trip time: 492.919922 us. total bw: 126.795443 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 253.173828 us. send bw: 3949.855352 MB/s.
read time: 210.937500 us. rev bw: 4740.740741 MB/s.
round-trip time: 464.111328 us. total bw: 134.665965 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 252.197266 us. send bw: 3965.150048 MB/s.
read time: 213.867188 us. rev bw: 4675.799087 MB/s.
round-trip time: 466.064453 us. total bw: 134.101624 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 252.929688 us. send bw: 3953.667954 MB/s.
read time: 213.134766 us. rev bw: 4691.867125 MB/s.
round-trip time: 466.064453 us. total bw: 134.101624 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 250.000000 us. send bw: 4000.000000 MB/s.
read time: 216.064453 us. rev bw: 4628.248588 MB/s.
round-trip time: 466.064453 us. total bw: 134.101624 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 196.777344 us. send bw: 5081.885856 MB/s.
read time: 161.132812 us. rev bw: 6206.060606 MB/s.
round-trip time: 357.910156 us. total bw: 174.624829 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 248.046875 us. send bw: 4031.496063 MB/s.
read time: 245.849609 us. rev bw: 4067.527309 MB/s.
round-trip time: 493.896484 us. total bw: 126.544736 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 103.027344 us. send bw: 9706.161137 MB/s.
read time: 229.980469 us. rev bw: 4348.195329 MB/s.
round-trip time: 333.007812 us. total bw: 187.683284 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 182.861328 us. send bw: 5468.624833 MB/s.
read time: 179.199219 us. rev bw: 5580.381471 MB/s.
round-trip time: 362.060547 us. total bw: 172.623061 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 244.873047 us. send bw: 4083.748754 MB/s.
read time: 219.970703 us. rev bw: 4546.059933 MB/s.
round-trip time: 464.843750 us. total bw: 134.453782 MS/s
phung@UbuntuHW15:~/Downloads$ ./test 65536
sizeof(unsigned __int128) = 16
write time: 249.023438 us. send bw: 4015.686275 MB/s.
read time: 213.867188 us. rev bw: 4675.799087 MB/s.
round-trip time: 462.890625 us. total bw: 135.021097 MS/s
phung@UbuntuHW15:~/Downloads$
kevin
 
Posts: 40
Joined: Tue Dec 12, 2017 10:41 am

Re: Bandwidth measurement exceeds theoretical bandwidth limi

Postby support » Mon May 14, 2018 10:36 am

Hello,

The reason is that you're measuring wrong. In quite a few ways, as a matter of fact.

The exceeded bandwidth measurement seems to be a result of data being buffered in the DMA buffers, so timing the write() call is meaningless. It has a negligible effect when the test runs for a few seconds, but you can't do that, because the writes and reads are coupled in the same execution thread.

All that is explained in this page, which relates to measuring bandwidth. Pay special attention to items 1, 3 and 5.

http://xillybus.com/doc/bandwidth-guidelines

Regards,
Eli
support
 
Posts: 615
Joined: Tue Apr 24, 2012 3:46 pm

Re: Bandwidth measurement exceeds theoretical bandwidth limi

Postby kevin » Tue May 15, 2018 7:20 am

Is the following c coding the right way to measure bandwidth ?

Code: Select all
/** Simple multi-threaded application explains how to create apps with multiple
 * threads and also explains about how to deal with args parameter of pthread_create
 */
#include <stdio.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <stdint.h>
#include <sys/time.h>
#include "timer.h"

#define VP void*
#define NTH 2

int fdw128 = 0;
int fdr128 = 0;

int N = 0;
int i;

unsigned __int128 *array_input;
unsigned __int128 *array_hardware;

struct timeval tv1,tv2;
ssize_t w,e,r;

int main(int argc, char *argv[])
{
        fdw128 = open("/dev/xillybus_write_128", O_WRONLY);
        fdr128 = open("/dev/xillybus_read_128", O_RDONLY);

   N = atoi(argv[1]);

        if (fdw128<0 || fdr128<0) {
                perror("Failed to open devfiles");
                exit(1);
        }

   //Initialize timer
   GET_TIME_INIT(2);

        //allocate memory
        array_input = (unsigned __int128*) malloc(N*sizeof(unsigned __int128));
        array_hardware = (unsigned __int128*) malloc(N*sizeof(unsigned __int128));

        // generate inputs and prepare outputs
        for(i=0; i<N; i++){
                array_input[i] = i;
                array_hardware[i] = 0;
        }

   VP sample_1(VP arg) {
           w = write(fdw128, array_input, sizeof(unsigned __int128)*N);
           e = write(fdw128, NULL, 0);
           pthread_exit(NULL);
   }

   VP sample_2(VP arg) {
           r = read(fdr128, array_hardware, sizeof(unsigned __int128)*N);
           pthread_exit(NULL);
   }

   pthread_t tid[NTH];
   int loop = 0;
   int value[NTH] = {1,2};
   printf("\n Going to create threads \n");
   /** Creation of threads*/

/*   for(loop=0; loop<NTH; loop++) {
      pthread_create(&tid[loop], NULL, &sample, &value[loop]);
      printf("\n value of loop = %d\n", loop);
   }
*/
   //gettimeofday(&tv1, NULL);
   pthread_create(&tid[0], NULL, &sample_1, &value[0]);
          //printf("\n value of loop = %d\n", 0);
   
   pthread_create(&tid[1], NULL, &sample_2, &value[1]);
        //printf("\n value of loop = %d\n", 1);

   /** Synch of threads in order to exit normally*/
//   gettimeofday(&tv1, NULL);
   GET_TIME_VAL(0);
   for(loop=0; loop<NTH; loop++) {
      pthread_join(tid[loop], NULL);
   }
//   gettimeofday(&tv2, NULL);
   GET_TIME_VAL(1);

        printf("round-trip time: %f us\n\r", (TIME_VAL_TO_MS(1) - TIME_VAL_TO_MS(0))*1000.0);
        printf("overall bw: %f MegaBytes/second\n\r", 2*N*(sizeof(unsigned __int128))/1024/1024/((TIME_VAL_TO_MS(1) - TIME_VAL_TO_MS(0))/1000.0));

/*      for(i=0; i<N/4; i++){
              printf("i/p is %d , o/p is %d \n\r",array_input[i],array_hardware[i]);
      }
*/   
   close(fdw128);
   close(fdr128);

   free(array_input);
   free(array_hardware);
      
   return EXIT_SUCCESS;

}


Code: Select all
// ----------------------------------------------------------------------
// Copyright (c) 2016, The Regents of the University of California All
// rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//
//     * Neither the name of The Regents of the University of California
//       nor the names of its contributors may be used to endorse or
//       promote products derived from this software without specific
//       prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL REGENTS OF THE
// UNIVERSITY OF CALIFORNIA BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
// OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
// USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
// ----------------------------------------------------------------------
#include <sys/time.h>

#define GET_TIME_INIT(num) struct timeval _timers[num]

#define GET_TIME_VAL(num) gettimeofday(&_timers[num], NULL)

#define TIME_VAL_TO_MS(num) (((double)_timers[num].tv_sec*1000.0) + ((double)_timers[num].tv_usec/1000.0))


phung@UbuntuHW15:~/Documents/fpga_overlay$ gcc -g -pthread test_xl_lpthread.c -o test_xl_lpthread
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 697.021484 us
overall bw: 2869.352014 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 477.050781 us
overall bw: 4192.425793 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 748.046875 us
overall bw: 2673.629243 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 512.207031 us
overall bw: 3904.671115 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 358.154297 us
overall bw: 5584.185412 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 744.140625 us
overall bw: 2687.664042 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 854.980469 us
overall bw: 2339.234723 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 691.162109 us
overall bw: 2893.677146 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 674.072266 us
overall bw: 2967.040927 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 708.007812 us
overall bw: 2824.827586 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 857.177734 us
overall bw: 2333.238394 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$ ./test_xl_lpthread 65536

Going to create threads
round-trip time: 752.929688 us
overall bw: 2656.290532 MegaBytes/second
phung@UbuntuHW15:~/Documents/fpga_overlay$
kevin
 
Posts: 40
Joined: Tue Dec 12, 2017 10:41 am

Re: Bandwidth measurement exceeds theoretical bandwidth limi

Postby support » Tue May 15, 2018 9:02 am

Hello,

The web page I pointed at earlier suggests using dd to measure bandwidth, rather than writing your own code.

If you want to get acquainted with programming I/O programs with C, I suggest asking for assistance in related forums. Your current code suffers from multiple issues. This forum deals with Xillybus, not C programming techniques.

Among the things I've spotted: You don't check the return values of read() and write(), so you can't be sure how much was read or written. Also, the data segment is way too short, so the time measurement gives you less than 1 ms, and is hence polluted by all kinds of delays that an operating system can impose.

Besides, the fact that you're running the data in both directions implies that you're looping back on the FPGA, something that the said web page suggests against.

The documentation describes the recommended programming practices, which are just the common way to use POSIX I/O. I suggest taking a look on that too.

Regards,
Eli
support
 
Posts: 615
Joined: Tue Apr 24, 2012 3:46 pm


Return to Xillybus

cron