by kevin »
Hi, I just tested again with my corrected xillydemo.v (fix the bit range of number_of_128_bit_data_passed_through_loopback_fifo) with the same test.cpp code.
And now, I could not get all data back through loopback.
https://gist.github.com/promach/a3af6c59906567c3df4179a501513a1b#file-xillydemo-v-L145-L193https://gist.github.com/promach/9d185d35a6e6db0da10992a19c36f754#file-test-cppCould you see if I make any other silly mistakes in the coding ?
- Code: Select all
`define LOOPBACK 1
module xillydemo(PCIE_PERST_B_LS, PCIE_REFCLK_N, PCIE_REFCLK_P, PCIE_RX_N, PCIE_RX_P, GPIO_LED, PCIE_TX_N, PCIE_TX_P);
localparam STREAM_WIDTH = 128;
input PCIE_PERST_B_LS;
input PCIE_REFCLK_N;
input PCIE_REFCLK_P;
input [7:0] PCIE_RX_N;
input [7:0] PCIE_RX_P;
output [3:0] GPIO_LED;
output [7:0] PCIE_TX_N;
output [7:0] PCIE_TX_P;
// Clock and quiesce
wire bus_clk;
wire quiesce;
// Memory array
reg [7:0] demoarray[0:31];
// Wires related to /dev/xillybus_mem_128
wire user_r_mem_128_rden;
wire user_r_mem_128_empty;
reg [STREAM_WIDTH-1:0] user_r_mem_128_data;
wire user_r_mem_128_eof;
wire user_r_mem_128_open;
wire user_w_mem_128_wren;
wire user_w_mem_128_full;
wire [STREAM_WIDTH-1:0] user_w_mem_128_data;
wire user_w_mem_128_open;
wire [$clog2(STREAM_WIDTH)-1:0] user_mem_128_addr;
wire user_mem_128_addr_update;
// Wires related to /dev/xillybus_read_128
wire user_r_read_128_rden;
wire user_r_read_128_empty;
wire [STREAM_WIDTH-1:0] user_r_read_128_data;
wire user_r_read_128_eof;
wire user_r_read_128_open;
// Wires related to /dev/xillybus_write_128
wire user_w_write_128_wren;
wire user_w_write_128_full;
wire [STREAM_WIDTH-1:0] user_w_write_128_data;
wire user_w_write_128_open;
// Wires related to /dev/xillybus_read_256
wire user_r_read_256_rden;
wire user_r_read_256_empty;
wire [(STREAM_WIDTH << 1)-1:0] user_r_read_256_data;
wire user_r_read_256_eof;
wire user_r_read_256_open;
// Wires related to /dev/xillybus_write_256
wire user_w_write_256_wren;
wire user_w_write_256_full;
wire [(STREAM_WIDTH << 1)-1:0] user_w_write_256_data;
wire user_w_write_256_open;
xillybus xillybus_ins (
// Ports related to /dev/xillybus_mem_128
// FPGA to CPU signals:
.user_r_mem_128_rden(user_r_mem_128_rden),
.user_r_mem_128_empty(user_r_mem_128_empty),
.user_r_mem_128_data(user_r_mem_128_data),
.user_r_mem_128_eof(user_r_mem_128_eof),
.user_r_mem_128_open(user_r_mem_128_open),
// CPU to FPGA signals:
.user_w_mem_128_wren(user_w_mem_128_wren),
.user_w_mem_128_full(user_w_mem_128_full),
.user_w_mem_128_data(user_w_mem_128_data),
.user_w_mem_128_open(user_w_mem_128_open),
// Address signals:
.user_mem_128_addr(user_mem_128_addr),
.user_mem_128_addr_update(user_mem_128_addr_update),
// Ports related to /dev/xillybus_read_256
// FPGA to CPU signals:
.user_r_read_256_rden(user_r_read_256_rden),
.user_r_read_256_empty(user_r_read_256_empty),
.user_r_read_256_data(user_r_read_256_data),
.user_r_read_256_eof(user_r_read_256_eof),
.user_r_read_256_open(user_r_read_256_open),
// Ports related to /dev/xillybus_write_256
// CPU to FPGA signals:
.user_w_write_256_wren(user_w_write_256_wren),
.user_w_write_256_full(user_w_write_256_full),
.user_w_write_256_data(user_w_write_256_data),
.user_w_write_256_open(user_w_write_256_open),
// Ports related to /dev/xillybus_read_128
// FPGA to CPU signals:
.user_r_read_128_rden(user_r_read_128_rden),
.user_r_read_128_empty(user_r_read_128_empty),
.user_r_read_128_data(user_r_read_128_data),
.user_r_read_128_eof(user_r_read_128_eof),
.user_r_read_128_open(user_r_read_128_open),
// Ports related to /dev/xillybus_write_128
// CPU to FPGA signals:
.user_w_write_128_wren(user_w_write_128_wren),
.user_w_write_128_full(user_w_write_128_full),
.user_w_write_128_data(user_w_write_128_data),
.user_w_write_128_open(user_w_write_128_open),
// Signals to top level
.PCIE_PERST_B_LS(PCIE_PERST_B_LS),
.PCIE_REFCLK_N(PCIE_REFCLK_N),
.PCIE_REFCLK_P(PCIE_REFCLK_P),
.PCIE_RX_N(PCIE_RX_N),
.PCIE_RX_P(PCIE_RX_P),
.GPIO_LED(GPIO_LED),
.PCIE_TX_N(PCIE_TX_N),
.PCIE_TX_P(PCIE_TX_P),
.bus_clk(bus_clk),
.quiesce(quiesce)
);
// A simple inferred RAM
always @(posedge bus_clk)
begin
if (user_w_mem_128_wren)
demoarray[user_mem_128_addr] <= user_w_mem_128_data;
if (user_r_mem_128_rden)
user_r_mem_128_data <= demoarray[user_mem_128_addr];
end
assign user_r_mem_128_empty = 0;
assign user_r_mem_128_eof = 0;
assign user_w_mem_128_full = 0;
`ifdef LOOPBACK
wire [$clog2(STREAM_WIDTH)-1:0] data_count_of_loopback_fifo;
// 128-bit loopback
fifo_128 fifo_128x128
(
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(user_w_write_128_data),
.enqueue_en(user_w_write_128_wren),
.dequeue_en(user_r_read_128_rden),
.value_o(user_r_read_128_data),
.full(user_w_write_128_full),
.empty(user_r_read_128_empty),
.count(data_count_of_loopback_fifo)
);
assign user_r_read_128_eof = 0;
localparam TOTAL_NUM_OF_PIXELS = 512*512; // the image is sized as 3*512*512 , width=512 and height=512
localparam NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
localparam PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] or [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
// to check if xillybus has transmitted all pixels data through the loopback fifo
reg [$clog2(TOTAL_NUM_OF_PIXELS*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE/STREAM_WIDTH)-1:0] number_of_128_bit_data_passed_through_loopback_fifo = 0; // initially nothing is received
always@ (posedge bus_clk) begin
if(!user_w_write_128_open && !user_r_read_128_open)
number_of_128_bit_data_passed_through_loopback_fifo <= 0;
else if(user_w_write_128_wren && (number_of_128_bit_data_passed_through_loopback_fifo < (TOTAL_NUM_OF_PIXELS/STREAM_WIDTH)))
number_of_128_bit_data_passed_through_loopback_fifo <= number_of_128_bit_data_passed_through_loopback_fifo + 1; // for every xillybus transaction, input fifo should receive 128 bits, or equivalently 'KERNEL_NUM' pieces of pixels
end
// Vivado built-in internal logic analyzer module instantiation
ila_1 ila(
.clk(bus_clk),
.probe0(user_w_write_128_data),
.probe1(user_r_read_128_data),
.probe2(data_count_of_loopback_fifo),
.probe3(user_w_write_128_full),
.probe4(user_w_write_128_wren),
.probe5(user_r_read_128_rden),
.probe6(user_r_read_128_empty),
.probe7(user_w_write_128_open),
.probe8(user_r_read_128_open),
.probe9(number_of_128_bit_data_passed_through_loopback_fifo)
);
`else
// Signals for ($floor((STREAM_WIDTH/PIXEL_VALUE_RANGE)/NUM_OF_COMPONENTS_IN_A_PIXEL) = 5) kernels
// since an image pixel is unsigned 8-bit integer, its component values of [R, G, B] or [Y, U, V] range from 0 to 255.
// A pixel occupies 3*8=24 bits. Therefore, in each transaction, we could at most put 5 pixels (120 bits) into /dev/xillybus_write_128,
// computes the relevant kernel equations for 5 pixels, send out 5 pixels again through /dev/xillybus_read_128
localparam NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] , output:[Y, U, V]
localparam PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] and [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
localparam KERNEL_NUM = 5; // 5 copies of kernel, each kernel computes equation for [R, G, B] of one single pixel
localparam TOTAL_NUM_OF_PIXELS = 512*512; // lena.tiff is sized as 3*512*512 , width=512 and height=512
// Signals for two buffer FIFOs
localparam FIFO_DEPTH = 16;
wire [$clog2(FIFO_DEPTH):0] data_count_of_input_fifo; // determines whether all five pixel slots have incoming data or not
wire [$clog2(FIFO_DEPTH):0] data_count_of_output_fifo;
//-------------------------------------------kernel----------------------------------------//
wire [STREAM_WIDTH-1:0] stream_i_V_V_dout; // Read data for 5 pixels or KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE = 120 bits (note that we neglected the most significant 8 bits)
wire stream_i_V_V_empty; // Empty condition
wire [KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL-1:0] stream_i_V_V_read; // Read enable for each color components of all five pixels, high active
wire [STREAM_WIDTH-1:0] stream_o_V_V_din; // Write data for 5 pixels or KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE = 120 bits (note that we neglected the most significant 8 bits)
wire stream_o_V_V_full; // Full condition
wire [KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL-1:0] stream_o_V_V_write; // Write enable for each color components of all five pixels, high active
wire is_last_few_pixels = (data_count_of_input_fifo < KERNEL_NUM) && (!user_w_write_128_wren); // the remaining pixels do not fill all five pixel slots for a 128-bit stream, AND the input FIFO is not accepting any more pixels (in contrary when it is filling in the input FIFO at the initial time) AND the input FIFO is outputting pixels
reg [KERNEL_NUM-1:0] ap_start = 0; // initially the HLS kernels are not started
wire [KERNEL_NUM-1:0] ap_done;
wire [KERNEL_NUM-1:0] ap_idle;
wire [KERNEL_NUM-1:0] ap_ready;
always @(posedge bus_clk)
ap_start <= (is_last_few_pixels) ? (data_count_of_input_fifo) : {KERNEL_NUM{(&stream_i_V_V_read || !stream_i_V_V_empty)}}; // start signals depend on whether all five pixel slots are filled or not
// -----------------input FIFO ----------------------------------//
fifo_fwft_128
#(
.WIDTH(STREAM_WIDTH),
.SIZE(FIFO_DEPTH)
)
input_pipe(
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(user_w_write_128_data),
.enqueue_en(user_w_write_128_wren),
.dequeue_en(&stream_i_V_V_read),
.value_o(stream_i_V_V_dout),
.full(user_w_write_128_full),
.empty(stream_i_V_V_empty),
.count(data_count_of_input_fifo)
);
// to check if xillybus has transmitted all pixels data to the input_pipe fifo
reg [$clog2(TOTAL_NUM_OF_PIXELS)-1:0] number_of_pixels_received_by_input_fifo = 0; // initially nothing is received
always@ (posedge bus_clk) begin
if(!user_w_write_128_open && !user_r_read_128_open)
number_of_pixels_received_by_input_fifo <= 0;
else if(user_w_write_128_wren && (number_of_pixels_received_by_input_fifo <= (TOTAL_NUM_OF_PIXELS-KERNEL_NUM)))
number_of_pixels_received_by_input_fifo <= number_of_pixels_received_by_input_fifo + KERNEL_NUM; // for every xillybus transaction, input fifo should receive 'KERNEL_NUM' pieces of pixels
end
// use of generate loop to replicate 5 hardware copies of RGB2YUV computational HLS kernels for 5 different pixels
generate
genvar kn; // to indicate which kernel
for(kn=0; kn<KERNEL_NUM; kn=kn+1) begin
kernel RGB2YUV_kn (
.ap_clk(bus_clk),
.ap_rst(!user_w_write_128_open && !user_r_read_128_open),
.ap_start(ap_start[kn]), // need to confirm ?
.ap_done(ap_done[kn]),
.ap_idle(ap_idle[kn]),
.ap_ready(ap_ready[kn]),
.stream_i0_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE]), // input component R with (PIXEL_VALUE_RANGE) bits
.stream_i0_V_V_empty_n(!stream_i_V_V_empty),
.stream_i0_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL]),
.stream_i1_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE]), // input component G with (PIXEL_VALUE_RANGE) bits
.stream_i1_V_V_empty_n(!stream_i_V_V_empty),
.stream_i1_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]),
.stream_i2_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE]), // input component B with (PIXEL_VALUE_RANGE) bits
.stream_i2_V_V_empty_n(!stream_i_V_V_empty),
.stream_i2_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)]),
.stream_o0_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE]), // output component Y with (PIXEL_VALUE_RANGE) bits
.stream_o0_V_V_full_n(!stream_o_V_V_full),
.stream_o0_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL]),
.stream_o1_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE]), // output component U with (PIXEL_VALUE_RANGE) bits
.stream_o1_V_V_full_n(!stream_o_V_V_full),
.stream_o1_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]),
.stream_o2_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE]), // output component V with (PIXEL_VALUE_RANGE) bits
.stream_o2_V_V_full_n(!stream_o_V_V_full),
.stream_o2_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)])
);
end
endgenerate
assign stream_o_V_V_din[STREAM_WIDTH-1 : (KERNEL_NUM-1)*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE] = 0; // (note that we neglected the most significant 8 bits)
//----------------------output FIFO-----------------------------//
fifo_128
#(
.WIDTH(STREAM_WIDTH),
.SIZE(FIFO_DEPTH)
)
output_pipe (
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(stream_o_V_V_din),
.enqueue_en(&stream_o_V_V_write),
.dequeue_en(user_r_read_128_rden),
.value_o(user_r_read_128_data),
.full(stream_o_V_V_full),
.empty(user_r_read_128_empty),
.count(data_count_of_output_fifo)
);
// to check if xillybus has transmitted all pixels data from the output_pipe fifo
reg [$clog2(TOTAL_NUM_OF_PIXELS)-1:0] number_of_pixels_sent_by_output_fifo = 0; // initially nothing is sent
always@ (posedge bus_clk) begin
if(!user_w_write_128_open && !user_r_read_128_open)
number_of_pixels_sent_by_output_fifo <= 0;
else if(user_r_read_128_rden && (number_of_pixels_sent_by_output_fifo <= (TOTAL_NUM_OF_PIXELS-KERNEL_NUM)))
number_of_pixels_sent_by_output_fifo <= number_of_pixels_sent_by_output_fifo + KERNEL_NUM; // for every xillybus transaction, output fifo should send 'KERNEL_NUM' pieces of pixels
end
assign user_r_read_128_eof = 0;
// Vivado built-in internal logic analyzer module instantiation
ila_0 ila(
.clk(bus_clk),
.probe0(user_w_write_128_data),
.probe1(stream_i_V_V_dout),
.probe2(stream_o_V_V_din),
.probe3(user_r_read_128_data),
.probe4(stream_i_V_V_read),
.probe5(stream_o_V_V_write),
.probe6(data_count_of_input_fifo),
.probe7(data_count_of_output_fifo),
.probe8(user_w_write_128_full),
.probe9(stream_i_V_V_empty),
.probe10(user_w_write_128_wren),
.probe11(user_r_read_128_rden),
.probe12(stream_o_V_V_full),
.probe13(user_r_read_128_empty),
.probe14(user_w_write_128_open),
.probe15(user_r_read_128_open),
.probe16(ap_start),
.probe17(ap_done),
.probe18(ap_idle),
.probe19(ap_ready),
.probe20(is_last_few_pixels),
.probe21(number_of_pixels_received_by_input_fifo),
.probe22(number_of_pixels_sent_by_output_fifo)
);
`endif
endmodule
- Code: Select all
// g++ -g -pedantic -Wall -Werror -Wextra -fsanitize=address test.cpp -o test
#define FORK 1
#include <unistd.h>
#include <fcntl.h>
#include <iostream>
#include <fstream>
#include <sys/wait.h>
#include <errno.h>
using namespace std;
const unsigned int image_width = 512;
const unsigned int image_height = 512;
const unsigned int NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
void allwrite(int fd, unsigned char *buf, int len) {
int sent = 0;
int rc;
while (sent < len) {
rc = write(fd, buf + sent, len - sent);//fsync(fd);
if ((rc < 0) && (errno == EINTR))
continue;
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
sent += rc;
}
//return sent;
}
void allread(int fd, unsigned char *buf, int len) {
int recvd = 0;
int rc;
while (recvd < len) {
rc = read(fd, buf + recvd, len - recvd);//fsync(fd);
if ((rc < 0) && (errno == EINTR))
continue;
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
recvd += rc;
}
//return recvd;
}
int main() {
#ifdef FORK
int wait_status; // for wait()
pid_t pid;
#endif
int fdr, fdw;
uint8_t *wr_buf, *rd_buf;
fdr = open("/dev/xillybus_read_128", O_RDONLY);
fdw = open("/dev/xillybus_write_128", O_WRONLY);
if ((fdr < 0) || (fdw < 0)) {
perror("Failed to open Xillybus device file(s)");
exit(1);
}
#ifdef FORK
pid = fork();
if (pid < 0) {
perror("Failed to fork()");
exit(1);
}
if (pid) {
close(fdr);
#endif
uint8_t rgb_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]; // could accomodate all (image_width*image_width) RGB pixels
rgb_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL] = '\0'; // however, this NULL character is not sent across write()
for(unsigned int rgb_index=0; rgb_index<(image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL); rgb_index++)
{
rgb_stream[rgb_index] = 1; // send all ones to fpga
}
wr_buf = rgb_stream;
allwrite(fdw, wr_buf, image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL);
allwrite(fdw, NULL, 0); // flush the write stream
printf("after allwrite() \n");
close(fdw);
#ifdef FORK
printf("*** Write process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** write process detects read process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
else {
close(fdw);
#endif
uint8_t yuv_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]; // could accomodate (image_width*image_width) YUV pixels
yuv_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL] = '\0'; // this NULL character is only to act as "stop bit" for character array
rd_buf = yuv_stream;
printf("before allread() \n");
allread(fdr, rd_buf, image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL);
printf("after allread() \n");
close(fdr);
#ifdef FORK
printf("*** Read process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** read process detects write process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
#endif
}
Hi, I just tested again with my corrected xillydemo.v (fix the bit range of number_of_128_bit_data_passed_through_loopback_fifo) with the same test.cpp code.
And now, I could not get all data back through loopback.
[url]https://gist.github.com/promach/a3af6c59906567c3df4179a501513a1b#file-xillydemo-v-L145-L193[/url]
[url]https://gist.github.com/promach/9d185d35a6e6db0da10992a19c36f754#file-test-cpp[/url]
Could you see if I make any other silly mistakes in the coding ?
[code]`define LOOPBACK 1
module xillydemo(PCIE_PERST_B_LS, PCIE_REFCLK_N, PCIE_REFCLK_P, PCIE_RX_N, PCIE_RX_P, GPIO_LED, PCIE_TX_N, PCIE_TX_P);
localparam STREAM_WIDTH = 128;
input PCIE_PERST_B_LS;
input PCIE_REFCLK_N;
input PCIE_REFCLK_P;
input [7:0] PCIE_RX_N;
input [7:0] PCIE_RX_P;
output [3:0] GPIO_LED;
output [7:0] PCIE_TX_N;
output [7:0] PCIE_TX_P;
// Clock and quiesce
wire bus_clk;
wire quiesce;
// Memory array
reg [7:0] demoarray[0:31];
// Wires related to /dev/xillybus_mem_128
wire user_r_mem_128_rden;
wire user_r_mem_128_empty;
reg [STREAM_WIDTH-1:0] user_r_mem_128_data;
wire user_r_mem_128_eof;
wire user_r_mem_128_open;
wire user_w_mem_128_wren;
wire user_w_mem_128_full;
wire [STREAM_WIDTH-1:0] user_w_mem_128_data;
wire user_w_mem_128_open;
wire [$clog2(STREAM_WIDTH)-1:0] user_mem_128_addr;
wire user_mem_128_addr_update;
// Wires related to /dev/xillybus_read_128
wire user_r_read_128_rden;
wire user_r_read_128_empty;
wire [STREAM_WIDTH-1:0] user_r_read_128_data;
wire user_r_read_128_eof;
wire user_r_read_128_open;
// Wires related to /dev/xillybus_write_128
wire user_w_write_128_wren;
wire user_w_write_128_full;
wire [STREAM_WIDTH-1:0] user_w_write_128_data;
wire user_w_write_128_open;
// Wires related to /dev/xillybus_read_256
wire user_r_read_256_rden;
wire user_r_read_256_empty;
wire [(STREAM_WIDTH << 1)-1:0] user_r_read_256_data;
wire user_r_read_256_eof;
wire user_r_read_256_open;
// Wires related to /dev/xillybus_write_256
wire user_w_write_256_wren;
wire user_w_write_256_full;
wire [(STREAM_WIDTH << 1)-1:0] user_w_write_256_data;
wire user_w_write_256_open;
xillybus xillybus_ins (
// Ports related to /dev/xillybus_mem_128
// FPGA to CPU signals:
.user_r_mem_128_rden(user_r_mem_128_rden),
.user_r_mem_128_empty(user_r_mem_128_empty),
.user_r_mem_128_data(user_r_mem_128_data),
.user_r_mem_128_eof(user_r_mem_128_eof),
.user_r_mem_128_open(user_r_mem_128_open),
// CPU to FPGA signals:
.user_w_mem_128_wren(user_w_mem_128_wren),
.user_w_mem_128_full(user_w_mem_128_full),
.user_w_mem_128_data(user_w_mem_128_data),
.user_w_mem_128_open(user_w_mem_128_open),
// Address signals:
.user_mem_128_addr(user_mem_128_addr),
.user_mem_128_addr_update(user_mem_128_addr_update),
// Ports related to /dev/xillybus_read_256
// FPGA to CPU signals:
.user_r_read_256_rden(user_r_read_256_rden),
.user_r_read_256_empty(user_r_read_256_empty),
.user_r_read_256_data(user_r_read_256_data),
.user_r_read_256_eof(user_r_read_256_eof),
.user_r_read_256_open(user_r_read_256_open),
// Ports related to /dev/xillybus_write_256
// CPU to FPGA signals:
.user_w_write_256_wren(user_w_write_256_wren),
.user_w_write_256_full(user_w_write_256_full),
.user_w_write_256_data(user_w_write_256_data),
.user_w_write_256_open(user_w_write_256_open),
// Ports related to /dev/xillybus_read_128
// FPGA to CPU signals:
.user_r_read_128_rden(user_r_read_128_rden),
.user_r_read_128_empty(user_r_read_128_empty),
.user_r_read_128_data(user_r_read_128_data),
.user_r_read_128_eof(user_r_read_128_eof),
.user_r_read_128_open(user_r_read_128_open),
// Ports related to /dev/xillybus_write_128
// CPU to FPGA signals:
.user_w_write_128_wren(user_w_write_128_wren),
.user_w_write_128_full(user_w_write_128_full),
.user_w_write_128_data(user_w_write_128_data),
.user_w_write_128_open(user_w_write_128_open),
// Signals to top level
.PCIE_PERST_B_LS(PCIE_PERST_B_LS),
.PCIE_REFCLK_N(PCIE_REFCLK_N),
.PCIE_REFCLK_P(PCIE_REFCLK_P),
.PCIE_RX_N(PCIE_RX_N),
.PCIE_RX_P(PCIE_RX_P),
.GPIO_LED(GPIO_LED),
.PCIE_TX_N(PCIE_TX_N),
.PCIE_TX_P(PCIE_TX_P),
.bus_clk(bus_clk),
.quiesce(quiesce)
);
// A simple inferred RAM
always @(posedge bus_clk)
begin
if (user_w_mem_128_wren)
demoarray[user_mem_128_addr] <= user_w_mem_128_data;
if (user_r_mem_128_rden)
user_r_mem_128_data <= demoarray[user_mem_128_addr];
end
assign user_r_mem_128_empty = 0;
assign user_r_mem_128_eof = 0;
assign user_w_mem_128_full = 0;
`ifdef LOOPBACK
wire [$clog2(STREAM_WIDTH)-1:0] data_count_of_loopback_fifo;
// 128-bit loopback
fifo_128 fifo_128x128
(
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(user_w_write_128_data),
.enqueue_en(user_w_write_128_wren),
.dequeue_en(user_r_read_128_rden),
.value_o(user_r_read_128_data),
.full(user_w_write_128_full),
.empty(user_r_read_128_empty),
.count(data_count_of_loopback_fifo)
);
assign user_r_read_128_eof = 0;
localparam TOTAL_NUM_OF_PIXELS = 512*512; // the image is sized as 3*512*512 , width=512 and height=512
localparam NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
localparam PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] or [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
// to check if xillybus has transmitted all pixels data through the loopback fifo
reg [$clog2(TOTAL_NUM_OF_PIXELS*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE/STREAM_WIDTH)-1:0] number_of_128_bit_data_passed_through_loopback_fifo = 0; // initially nothing is received
always@ (posedge bus_clk) begin
if(!user_w_write_128_open && !user_r_read_128_open)
number_of_128_bit_data_passed_through_loopback_fifo <= 0;
else if(user_w_write_128_wren && (number_of_128_bit_data_passed_through_loopback_fifo < (TOTAL_NUM_OF_PIXELS/STREAM_WIDTH)))
number_of_128_bit_data_passed_through_loopback_fifo <= number_of_128_bit_data_passed_through_loopback_fifo + 1; // for every xillybus transaction, input fifo should receive 128 bits, or equivalently 'KERNEL_NUM' pieces of pixels
end
// Vivado built-in internal logic analyzer module instantiation
ila_1 ila(
.clk(bus_clk),
.probe0(user_w_write_128_data),
.probe1(user_r_read_128_data),
.probe2(data_count_of_loopback_fifo),
.probe3(user_w_write_128_full),
.probe4(user_w_write_128_wren),
.probe5(user_r_read_128_rden),
.probe6(user_r_read_128_empty),
.probe7(user_w_write_128_open),
.probe8(user_r_read_128_open),
.probe9(number_of_128_bit_data_passed_through_loopback_fifo)
);
`else
// Signals for ($floor((STREAM_WIDTH/PIXEL_VALUE_RANGE)/NUM_OF_COMPONENTS_IN_A_PIXEL) = 5) kernels
// since an image pixel is unsigned 8-bit integer, its component values of [R, G, B] or [Y, U, V] range from 0 to 255.
// A pixel occupies 3*8=24 bits. Therefore, in each transaction, we could at most put 5 pixels (120 bits) into /dev/xillybus_write_128,
// computes the relevant kernel equations for 5 pixels, send out 5 pixels again through /dev/xillybus_read_128
localparam NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] , output:[Y, U, V]
localparam PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] and [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
localparam KERNEL_NUM = 5; // 5 copies of kernel, each kernel computes equation for [R, G, B] of one single pixel
localparam TOTAL_NUM_OF_PIXELS = 512*512; // lena.tiff is sized as 3*512*512 , width=512 and height=512
// Signals for two buffer FIFOs
localparam FIFO_DEPTH = 16;
wire [$clog2(FIFO_DEPTH):0] data_count_of_input_fifo; // determines whether all five pixel slots have incoming data or not
wire [$clog2(FIFO_DEPTH):0] data_count_of_output_fifo;
//-------------------------------------------kernel----------------------------------------//
wire [STREAM_WIDTH-1:0] stream_i_V_V_dout; // Read data for 5 pixels or KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE = 120 bits (note that we neglected the most significant 8 bits)
wire stream_i_V_V_empty; // Empty condition
wire [KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL-1:0] stream_i_V_V_read; // Read enable for each color components of all five pixels, high active
wire [STREAM_WIDTH-1:0] stream_o_V_V_din; // Write data for 5 pixels or KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE = 120 bits (note that we neglected the most significant 8 bits)
wire stream_o_V_V_full; // Full condition
wire [KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL-1:0] stream_o_V_V_write; // Write enable for each color components of all five pixels, high active
wire is_last_few_pixels = (data_count_of_input_fifo < KERNEL_NUM) && (!user_w_write_128_wren); // the remaining pixels do not fill all five pixel slots for a 128-bit stream, AND the input FIFO is not accepting any more pixels (in contrary when it is filling in the input FIFO at the initial time) AND the input FIFO is outputting pixels
reg [KERNEL_NUM-1:0] ap_start = 0; // initially the HLS kernels are not started
wire [KERNEL_NUM-1:0] ap_done;
wire [KERNEL_NUM-1:0] ap_idle;
wire [KERNEL_NUM-1:0] ap_ready;
always @(posedge bus_clk)
ap_start <= (is_last_few_pixels) ? (data_count_of_input_fifo) : {KERNEL_NUM{(&stream_i_V_V_read || !stream_i_V_V_empty)}}; // start signals depend on whether all five pixel slots are filled or not
// -----------------input FIFO ----------------------------------//
fifo_fwft_128
#(
.WIDTH(STREAM_WIDTH),
.SIZE(FIFO_DEPTH)
)
input_pipe(
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(user_w_write_128_data),
.enqueue_en(user_w_write_128_wren),
.dequeue_en(&stream_i_V_V_read),
.value_o(stream_i_V_V_dout),
.full(user_w_write_128_full),
.empty(stream_i_V_V_empty),
.count(data_count_of_input_fifo)
);
// to check if xillybus has transmitted all pixels data to the input_pipe fifo
reg [$clog2(TOTAL_NUM_OF_PIXELS)-1:0] number_of_pixels_received_by_input_fifo = 0; // initially nothing is received
always@ (posedge bus_clk) begin
if(!user_w_write_128_open && !user_r_read_128_open)
number_of_pixels_received_by_input_fifo <= 0;
else if(user_w_write_128_wren && (number_of_pixels_received_by_input_fifo <= (TOTAL_NUM_OF_PIXELS-KERNEL_NUM)))
number_of_pixels_received_by_input_fifo <= number_of_pixels_received_by_input_fifo + KERNEL_NUM; // for every xillybus transaction, input fifo should receive 'KERNEL_NUM' pieces of pixels
end
// use of generate loop to replicate 5 hardware copies of RGB2YUV computational HLS kernels for 5 different pixels
generate
genvar kn; // to indicate which kernel
for(kn=0; kn<KERNEL_NUM; kn=kn+1) begin
kernel RGB2YUV_kn (
.ap_clk(bus_clk),
.ap_rst(!user_w_write_128_open && !user_r_read_128_open),
.ap_start(ap_start[kn]), // need to confirm ?
.ap_done(ap_done[kn]),
.ap_idle(ap_idle[kn]),
.ap_ready(ap_ready[kn]),
.stream_i0_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE]), // input component R with (PIXEL_VALUE_RANGE) bits
.stream_i0_V_V_empty_n(!stream_i_V_V_empty),
.stream_i0_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL]),
.stream_i1_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE]), // input component G with (PIXEL_VALUE_RANGE) bits
.stream_i1_V_V_empty_n(!stream_i_V_V_empty),
.stream_i1_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]),
.stream_i2_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE]), // input component B with (PIXEL_VALUE_RANGE) bits
.stream_i2_V_V_empty_n(!stream_i_V_V_empty),
.stream_i2_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)]),
.stream_o0_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE]), // output component Y with (PIXEL_VALUE_RANGE) bits
.stream_o0_V_V_full_n(!stream_o_V_V_full),
.stream_o0_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL]),
.stream_o1_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE]), // output component U with (PIXEL_VALUE_RANGE) bits
.stream_o1_V_V_full_n(!stream_o_V_V_full),
.stream_o1_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]),
.stream_o2_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE]), // output component V with (PIXEL_VALUE_RANGE) bits
.stream_o2_V_V_full_n(!stream_o_V_V_full),
.stream_o2_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)])
);
end
endgenerate
assign stream_o_V_V_din[STREAM_WIDTH-1 : (KERNEL_NUM-1)*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE] = 0; // (note that we neglected the most significant 8 bits)
//----------------------output FIFO-----------------------------//
fifo_128
#(
.WIDTH(STREAM_WIDTH),
.SIZE(FIFO_DEPTH)
)
output_pipe (
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(stream_o_V_V_din),
.enqueue_en(&stream_o_V_V_write),
.dequeue_en(user_r_read_128_rden),
.value_o(user_r_read_128_data),
.full(stream_o_V_V_full),
.empty(user_r_read_128_empty),
.count(data_count_of_output_fifo)
);
// to check if xillybus has transmitted all pixels data from the output_pipe fifo
reg [$clog2(TOTAL_NUM_OF_PIXELS)-1:0] number_of_pixels_sent_by_output_fifo = 0; // initially nothing is sent
always@ (posedge bus_clk) begin
if(!user_w_write_128_open && !user_r_read_128_open)
number_of_pixels_sent_by_output_fifo <= 0;
else if(user_r_read_128_rden && (number_of_pixels_sent_by_output_fifo <= (TOTAL_NUM_OF_PIXELS-KERNEL_NUM)))
number_of_pixels_sent_by_output_fifo <= number_of_pixels_sent_by_output_fifo + KERNEL_NUM; // for every xillybus transaction, output fifo should send 'KERNEL_NUM' pieces of pixels
end
assign user_r_read_128_eof = 0;
// Vivado built-in internal logic analyzer module instantiation
ila_0 ila(
.clk(bus_clk),
.probe0(user_w_write_128_data),
.probe1(stream_i_V_V_dout),
.probe2(stream_o_V_V_din),
.probe3(user_r_read_128_data),
.probe4(stream_i_V_V_read),
.probe5(stream_o_V_V_write),
.probe6(data_count_of_input_fifo),
.probe7(data_count_of_output_fifo),
.probe8(user_w_write_128_full),
.probe9(stream_i_V_V_empty),
.probe10(user_w_write_128_wren),
.probe11(user_r_read_128_rden),
.probe12(stream_o_V_V_full),
.probe13(user_r_read_128_empty),
.probe14(user_w_write_128_open),
.probe15(user_r_read_128_open),
.probe16(ap_start),
.probe17(ap_done),
.probe18(ap_idle),
.probe19(ap_ready),
.probe20(is_last_few_pixels),
.probe21(number_of_pixels_received_by_input_fifo),
.probe22(number_of_pixels_sent_by_output_fifo)
);
`endif
endmodule
[/code]
[code]// g++ -g -pedantic -Wall -Werror -Wextra -fsanitize=address test.cpp -o test
#define FORK 1
#include <unistd.h>
#include <fcntl.h>
#include <iostream>
#include <fstream>
#include <sys/wait.h>
#include <errno.h>
using namespace std;
const unsigned int image_width = 512;
const unsigned int image_height = 512;
const unsigned int NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
void allwrite(int fd, unsigned char *buf, int len) {
int sent = 0;
int rc;
while (sent < len) {
rc = write(fd, buf + sent, len - sent);//fsync(fd);
if ((rc < 0) && (errno == EINTR))
continue;
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
sent += rc;
}
//return sent;
}
void allread(int fd, unsigned char *buf, int len) {
int recvd = 0;
int rc;
while (recvd < len) {
rc = read(fd, buf + recvd, len - recvd);//fsync(fd);
if ((rc < 0) && (errno == EINTR))
continue;
if (rc < 0) {
perror("allwrite() failed to write");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
exit(1);
}
recvd += rc;
}
//return recvd;
}
int main() {
#ifdef FORK
int wait_status; // for wait()
pid_t pid;
#endif
int fdr, fdw;
uint8_t *wr_buf, *rd_buf;
fdr = open("/dev/xillybus_read_128", O_RDONLY);
fdw = open("/dev/xillybus_write_128", O_WRONLY);
if ((fdr < 0) || (fdw < 0)) {
perror("Failed to open Xillybus device file(s)");
exit(1);
}
#ifdef FORK
pid = fork();
if (pid < 0) {
perror("Failed to fork()");
exit(1);
}
if (pid) {
close(fdr);
#endif
uint8_t rgb_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]; // could accomodate all (image_width*image_width) RGB pixels
rgb_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL] = '\0'; // however, this NULL character is not sent across write()
for(unsigned int rgb_index=0; rgb_index<(image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL); rgb_index++)
{
rgb_stream[rgb_index] = 1; // send all ones to fpga
}
wr_buf = rgb_stream;
allwrite(fdw, wr_buf, image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL);
allwrite(fdw, NULL, 0); // flush the write stream
printf("after allwrite() \n");
close(fdw);
#ifdef FORK
printf("*** Write process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** write process detects read process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
else {
close(fdw);
#endif
uint8_t yuv_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]; // could accomodate (image_width*image_width) YUV pixels
yuv_stream[image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL] = '\0'; // this NULL character is only to act as "stop bit" for character array
rd_buf = yuv_stream;
printf("before allread() \n");
allread(fdr, rd_buf, image_width*image_width*NUM_OF_COMPONENTS_IN_A_PIXEL);
printf("after allread() \n");
close(fdr);
#ifdef FORK
printf("*** Read process enters waiting status .....\n");
pid = wait(&wait_status);
printf("*** read process detects write process with pid %d was done ***\n", pid); // most probably write process will be done first, since FPGA computation takes a few clock cyles
return 0;
}
#endif
}[/code]