problem with user_w_write_128_wren
Posted:
I am using revision XL for Ultrascal VCU108 board. With regards to https://gist.github.com/promach/9d185d35a6e6db0da10992a19c36f754 and https://gist.github.com/promach/a3af6c59906567c3df4179a501513a1b , do you have any idea why is user_w_write_128_wren signal for the input FIFO is only asserted once as shown below ?
- Code: Select all
//`define LOOPBACK 1
module xillydemo(PCIE_PERST_B_LS, PCIE_REFCLK_N, PCIE_REFCLK_P, PCIE_RX_N, PCIE_RX_P, GPIO_LED, PCIE_TX_N, PCIE_TX_P);
localparam STREAM_WIDTH = 128;
input PCIE_PERST_B_LS;
input PCIE_REFCLK_N;
input PCIE_REFCLK_P;
input [7:0] PCIE_RX_N;
input [7:0] PCIE_RX_P;
output [3:0] GPIO_LED;
output [7:0] PCIE_TX_N;
output [7:0] PCIE_TX_P;
// Clock and quiesce
wire bus_clk;
wire quiesce;
// Memory array
reg [7:0] demoarray[0:31];
// Wires related to /dev/xillybus_mem_128
wire user_r_mem_128_rden;
wire user_r_mem_128_empty;
reg [STREAM_WIDTH-1:0] user_r_mem_128_data;
wire user_r_mem_128_eof;
wire user_r_mem_128_open;
wire user_w_mem_128_wren;
wire user_w_mem_128_full;
wire [STREAM_WIDTH-1:0] user_w_mem_128_data;
wire user_w_mem_128_open;
wire [$clog2(STREAM_WIDTH)-1:0] user_mem_128_addr;
wire user_mem_128_addr_update;
// Wires related to /dev/xillybus_read_128
wire user_r_read_128_rden;
wire user_r_read_128_empty;
wire [STREAM_WIDTH-1:0] user_r_read_128_data;
wire user_r_read_128_eof;
wire user_r_read_128_open;
// Wires related to /dev/xillybus_write_128
wire user_w_write_128_wren;
wire user_w_write_128_full;
wire [STREAM_WIDTH-1:0] user_w_write_128_data;
wire user_w_write_128_open;
// Wires related to /dev/xillybus_read_256
wire user_r_read_256_rden;
wire user_r_read_256_empty;
wire [(STREAM_WIDTH << 1)-1:0] user_r_read_256_data;
wire user_r_read_256_eof;
wire user_r_read_256_open;
// Wires related to /dev/xillybus_write_256
wire user_w_write_256_wren;
wire user_w_write_256_full;
wire [(STREAM_WIDTH << 1)-1:0] user_w_write_256_data;
wire user_w_write_256_open;
xillybus xillybus_ins (
// Ports related to /dev/xillybus_mem_128
// FPGA to CPU signals:
.user_r_mem_128_rden(user_r_mem_128_rden),
.user_r_mem_128_empty(user_r_mem_128_empty),
.user_r_mem_128_data(user_r_mem_128_data),
.user_r_mem_128_eof(user_r_mem_128_eof),
.user_r_mem_128_open(user_r_mem_128_open),
// CPU to FPGA signals:
.user_w_mem_128_wren(user_w_mem_128_wren),
.user_w_mem_128_full(user_w_mem_128_full),
.user_w_mem_128_data(user_w_mem_128_data),
.user_w_mem_128_open(user_w_mem_128_open),
// Address signals:
.user_mem_128_addr(user_mem_128_addr),
.user_mem_128_addr_update(user_mem_128_addr_update),
// Ports related to /dev/xillybus_read_256
// FPGA to CPU signals:
.user_r_read_256_rden(user_r_read_256_rden),
.user_r_read_256_empty(user_r_read_256_empty),
.user_r_read_256_data(user_r_read_256_data),
.user_r_read_256_eof(user_r_read_256_eof),
.user_r_read_256_open(user_r_read_256_open),
// Ports related to /dev/xillybus_write_256
// CPU to FPGA signals:
.user_w_write_256_wren(user_w_write_256_wren),
.user_w_write_256_full(user_w_write_256_full),
.user_w_write_256_data(user_w_write_256_data),
.user_w_write_256_open(user_w_write_256_open),
// Ports related to /dev/xillybus_read_128
// FPGA to CPU signals:
.user_r_read_128_rden(user_r_read_128_rden),
.user_r_read_128_empty(user_r_read_128_empty),
.user_r_read_128_data(user_r_read_128_data),
.user_r_read_128_eof(user_r_read_128_eof),
.user_r_read_128_open(user_r_read_128_open),
// Ports related to /dev/xillybus_write_128
// CPU to FPGA signals:
.user_w_write_128_wren(user_w_write_128_wren),
.user_w_write_128_full(user_w_write_128_full),
.user_w_write_128_data(user_w_write_128_data),
.user_w_write_128_open(user_w_write_128_open),
// Signals to top level
.PCIE_PERST_B_LS(PCIE_PERST_B_LS),
.PCIE_REFCLK_N(PCIE_REFCLK_N),
.PCIE_REFCLK_P(PCIE_REFCLK_P),
.PCIE_RX_N(PCIE_RX_N),
.PCIE_RX_P(PCIE_RX_P),
.GPIO_LED(GPIO_LED),
.PCIE_TX_N(PCIE_TX_N),
.PCIE_TX_P(PCIE_TX_P),
.bus_clk(bus_clk),
.quiesce(quiesce)
);
// A simple inferred RAM
always @(posedge bus_clk)
begin
if (user_w_mem_128_wren)
demoarray[user_mem_128_addr] <= user_w_mem_128_data;
if (user_r_mem_128_rden)
user_r_mem_128_data <= demoarray[user_mem_128_addr];
end
assign user_r_mem_128_empty = 0;
assign user_r_mem_128_eof = 0;
assign user_w_mem_128_full = 0;
//`ifdef LOOPBACK
wire [$clog2(STREAM_WIDTH)-1:0] data_count_of_loopback_fifo;
// 128-bit loopback
/* fifo_128 fifo_128x128
(
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(user_w_write_128_data),
.enqueue_en(user_w_write_128_wren),
.dequeue_en(user_r_read_128_rden),
.value_o(user_r_read_128_data),
.full(user_w_write_128_full),
.empty(user_r_read_128_empty),
.count(data_count_of_loopback_fifo)
);
assign user_r_read_128_eof = 0;*/
//`else
// Signals for ($floor((STREAM_WIDTH/PIXEL_VALUE_RANGE)/NUM_OF_COMPONENTS_IN_A_PIXEL) = 5) kernels
// since an image pixel is unsigned 8-bit integer, its component values of [R, G, B] or [Y, U, V] range from 0 to 255.
// A pixel occupies 3*8=24 bits. Therefore, in each transaction, we could at most put 5 pixels (120 bits) into /dev/xillybus_write_128,
// computes the relevant kernel equations for 5 pixels, send out 5 pixels again through /dev/xillybus_read_128
localparam NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] , output:[Y, U, V]
localparam PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] and [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
localparam KERNEL_NUM = 5; // 5 copies of kernel, each kernel computes equation for [R, G, B] of one single pixel
// Signals for two buffer FIFOs
wire [$clog2(STREAM_WIDTH)-1:0] data_count_of_input_fifo; // determines whether all five pixel slots have incoming data or not
wire [$clog2(STREAM_WIDTH)-1:0] data_count_of_output_fifo;
wire is_last_few_pixels = (data_count_of_input_fifo < KERNEL_NUM) && (!user_w_write_128_wren); // the remaining pixels do not fill all five pixel slots for a 128-bit stream, and the input FIFO is not accepting any more pixels (in contrary when it is filling in the input FIFO at the initial time)
//-------------------------------------------kernel----------------------------------------//
wire [STREAM_WIDTH-1:0] stream_i_V_V_dout; // Read data for 5 pixels or KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE = 120 bits (note that we neglected the most significant 8 bits)
wire stream_i_V_V_empty; // Empty condition
wire [KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL-1:0] stream_i_V_V_read; // Read enable for each color components of all five pixels, high active
wire [STREAM_WIDTH-1:0] stream_o_V_V_din; // Write data for 5 pixels or KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE = 120 bits (note that we neglected the most significant 8 bits)
wire stream_o_V_V_full; // Full condition
wire [KERNEL_NUM*NUM_OF_COMPONENTS_IN_A_PIXEL-1:0] stream_o_V_V_write; // Write enable for each color components of all five pixels, high active
wire [KERNEL_NUM-1:0] ap_start = (is_last_few_pixels) ? (data_count_of_input_fifo) : {KERNEL_NUM{1'b1}}; // start signals depend on whether all five pixel slots are filled or not
wire [KERNEL_NUM-1:0] ap_done;
wire [KERNEL_NUM-1:0] ap_idle;
wire [KERNEL_NUM-1:0] ap_ready;
// -----------------input FIFO ----------------------------------//
localparam FIFO_DEPTH = 16;
fifo_128
#(
.WIDTH(STREAM_WIDTH),
.SIZE(FIFO_DEPTH)
)
input_pipe(
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(user_w_write_128_data),
.enqueue_en(user_w_write_128_wren),
.dequeue_en(&stream_i_V_V_read),
.value_o(stream_i_V_V_dout),
.full(user_w_write_128_full),
.empty(stream_i_V_V_empty),
.count(data_count_of_input_fifo)
);
// use of generate loop to replicate 5 hardware copies of RGB2YUV computational HLS kernels for 5 different pixels
generate
genvar kn; // to indicate which kernel
for(kn=0; kn<KERNEL_NUM; kn=kn+1) begin
kernel RGB2YUV_kn (
.ap_clk(bus_clk),
.ap_rst(!user_w_write_128_open && !user_r_read_128_open),
.ap_start(ap_start[kn]), // need to confirm ?
.ap_done(ap_done[kn]),
.ap_idle(ap_idle[kn]),
.ap_ready(ap_ready[kn]),
.stream_i0_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE]), // input component R with (PIXEL_VALUE_RANGE) bits
.stream_i0_V_V_empty_n(!stream_i_V_V_empty),
.stream_i0_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL]),
.stream_i1_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE]), // input component G with (PIXEL_VALUE_RANGE) bits
.stream_i1_V_V_empty_n(!stream_i_V_V_empty),
.stream_i1_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]),
.stream_i2_V_V_dout(stream_i_V_V_dout[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE]), // input component B with (PIXEL_VALUE_RANGE) bits
.stream_i2_V_V_empty_n(!stream_i_V_V_empty),
.stream_i2_V_V_read(stream_i_V_V_read[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)]),
.stream_o0_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE]), // output component Y with (PIXEL_VALUE_RANGE) bits
.stream_o0_V_V_full_n(!stream_o_V_V_full),
.stream_o0_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL]),
.stream_o1_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + PIXEL_VALUE_RANGE]), // output component U with (PIXEL_VALUE_RANGE) bits
.stream_o1_V_V_full_n(!stream_o_V_V_full),
.stream_o1_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + 1]),
.stream_o2_V_V_din(stream_o_V_V_din[kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE - 1 : kn*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)*PIXEL_VALUE_RANGE]), // output component V with (PIXEL_VALUE_RANGE) bits
.stream_o2_V_V_full_n(!stream_o_V_V_full),
.stream_o2_V_V_write(stream_o_V_V_write[kn*NUM_OF_COMPONENTS_IN_A_PIXEL + (NUM_OF_COMPONENTS_IN_A_PIXEL-1)])
);
end
endgenerate
//----------------------output FIFO-----------------------------//
fifo_128
#(
.WIDTH(STREAM_WIDTH),
.SIZE(FIFO_DEPTH)
)
output_pipe (
.clk(bus_clk),
.reset(!user_w_write_128_open && !user_r_read_128_open),
.flush_en(0),
.value_i(stream_o_V_V_din),
.enqueue_en(&stream_o_V_V_write),
.dequeue_en(user_r_read_128_rden),
.value_o(user_r_read_128_data),
.full(stream_o_V_V_full),
.empty(user_r_read_128_empty),
.count(data_count_of_output_fifo)
);
assign user_r_read_128_eof = 0;
// Vivado built-in internal logic analyzer module instantiation
ila_0 ila(
.clk(bus_clk),
.probe0(user_w_write_128_data),
.probe1(stream_i_V_V_dout),
.probe2(stream_o_V_V_din),
.probe3(user_r_read_128_data),
.probe4(stream_i_V_V_read),
.probe5(stream_o_V_V_write),
.probe6(data_count_of_input_fifo),
.probe7(data_count_of_output_fifo),
.probe8(user_w_write_128_full),
.probe9(stream_i_V_V_empty),
.probe10(user_w_write_128_wren),
.probe11(user_r_read_128_rden),
.probe12(stream_o_V_V_full),
.probe13(user_r_read_128_empty),
.probe14(user_w_write_128_open),
.probe15(user_r_read_128_open),
.probe16(ap_start),
.probe17(ap_done),
.probe18(ap_idle),
.probe19(ap_ready),
.probe20(is_last_few_pixels)
);
//`endif
endmodule
- Code: Select all
// g++ -g -fsanitize=address host.cpp -o host `pkg-config --cflags --libs opencv`
#include <opencv2/core/core.hpp>
//#include <opencv2/imgcodecs/imgcodecs.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <unistd.h>
#include <fcntl.h>
#include <iostream>
#include <fstream> // std::ifstream, std::ofstream
#include <string>
#include <sys/wait.h>
#include <errno.h>
#include <cmath>
using namespace cv;
using namespace std;
//#define LOOPBACK 1
#define RGB2YUV 1
unsigned int image_width;
unsigned int image_height;
const unsigned int CHNL_NUM = 3;
const unsigned int RED_CHNL = 2;
const unsigned int GREEN_CHNL = 1;
const unsigned int BLUE_CHNL = 0;
const unsigned int STREAM_WIDTH = 128;
const unsigned int NUM_OF_BITS_PER_BYTE = 8;
const unsigned int PIXEL_VALUE_RANGE = 8; // number of bits occupied by [R, G, B] or [Y, U, V] respectively (8-bit unsigned integer for each components) , https://docs.microsoft.com/en-us/windows-hardware/drivers/display/yuv-rgb-data-range-conversions
const unsigned int NUM_OF_COMPONENTS_IN_A_PIXEL = 3; // input: [R, G, B] output:[Y, U, V]
const unsigned int PIXEL_NUM_THAT_FITS_STREAM_WIDTH = 5; // 128-bit stream can at most fits 5 pixels ((PIXEL_NUM_THAT_FITS_STREAM_WIDTH*NUM_OF_COMPONENTS_IN_A_PIXEL*PIXEL_VALUE_RANGE) bits = 120 bits), each pixels contains R, G, B which are encoded in 8 bits for each of the three color components
struct RGB_packet{
uint8_t R,G,B;
};
struct YUV_packet{
uint8_t Y,U,V;
};
struct YUV_packet* rgb2yuv(struct RGB_packet rgb_input) // convert rgb to yuv
{
unsigned char R = rgb_input.R;
unsigned char G = rgb_input.G;
unsigned char B = rgb_input.B;
struct YUV_packet *yuv_result = (YUV_packet *)malloc(image_width * image_height * sizeof(unsigned char) * NUM_OF_COMPONENTS_IN_A_PIXEL);
yuv_result->Y = 0.299*R + 0.587*G + 0.114*B;
yuv_result->U = 0.492*(B-yuv_result->Y);;
yuv_result->V = 0.877*(R-yuv_result->Y);
// https://www.pcmag.com/encyclopedia/term/55166/yuv-rgb-conversion-formulas
return yuv_result;
}
int main(int argc, char *argv[]) {
int fdr, fdw, rd, wr, rd_donebytes, wr_donebytes;
uint8_t *wr_buf, *rd_buf;
pid_t pid;
struct RGB_packet *tologic;
struct YUV_packet *fromlogic;
fdr = open("/dev/xillybus_read_128", O_RDONLY); // will change to /dev/xillybus_read_128
fdw = open("/dev/xillybus_write_128", O_WRONLY); // will change to /dev/xillybus_write_128
if ((fdr < 0) || (fdw < 0)) {
perror("Failed to open Xillybus device file(s)");
exit(1);
}
// READ in an image file
String imageName( "lena512color.tiff" ); // by default
if( argc > 1)
{
imageName = argv[1];
}
Mat image;
image = imread( imageName, IMREAD_COLOR ); // Read the file
if( image.empty() ) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl ;
return -1;
}
else
{
image_width = image.size().width;
image_height = image.size().height;
}
namedWindow( "Original Image", WINDOW_AUTOSIZE );
imshow( "Original Image", image );
Mat rgbchannel[CHNL_NUM];
// The actual splitting.
split(image, rgbchannel);
namedWindow("Red", WINDOW_AUTOSIZE);
imshow("Red", rgbchannel[RED_CHNL]);
namedWindow("Green", WINDOW_AUTOSIZE);
imshow("Green", rgbchannel[GREEN_CHNL]);
namedWindow("Blue", WINDOW_AUTOSIZE);
imshow("Blue", rgbchannel[BLUE_CHNL]);
waitKey(0); // see all three split channels before feeding in the channel data to xillybus/RIFFA for hardware computation
vector<RGB_packet> vTo(image_width * image_height); // lena.tiff is sized as 3*512*512
tologic = vTo.data();
if (!tologic) {
fprintf(stderr, "Failed to allocate memory\n");
exit(1);
}
for(unsigned int pixel_index = 0; pixel_index < (image_width * image_height); pixel_index++)
{
tologic[pixel_index].R = *(rgbchannel[RED_CHNL].data + pixel_index);
tologic[pixel_index].G = *(rgbchannel[GREEN_CHNL].data + pixel_index);
tologic[pixel_index].B = *(rgbchannel[BLUE_CHNL].data + pixel_index);
}
pid = fork();
if (pid < 0) {
perror("Failed to fork()");
exit(1);
}
if (pid) {
close(fdr);
wr_donebytes = 0; // this variable includes the empty 8 bits for the MSB
unsigned int num_of_pixels_sent = 0; // this is actual pixels number already sent, does not include the emtpy 8 bits
unsigned int if_index = 0;
unsigned int rgb_stream_index = 0;
uint8_t rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE + 1]; // could accomodate 5 pixels
while (num_of_pixels_sent < image_width * image_height)
{
if(((image_width * image_height)-num_of_pixels_sent) >= PIXEL_NUM_THAT_FITS_STREAM_WIDTH)
{
// arrange the five pixels in the format as in https://i.imgur.com/mdJwk7J.png
//if_index++; printf("if_index = %d\n\r", if_index);
for(rgb_stream_index = 1; (rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); rgb_stream_index=rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
rgb_stream[rgb_stream_index] = tologic[((rgb_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].B;
rgb_stream[rgb_stream_index+1] = tologic[((rgb_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].G;
rgb_stream[rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1] = tologic[((rgb_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].R;
}
rgb_stream[STREAM_WIDTH/NUM_OF_BITS_PER_BYTE] = '\0'; // however, this NULL character is not sent across write()
rgb_stream[0] = 0; // remember that the eight most significant bits of the 128-bits stream are ignored by hardware logic
for(unsigned int j=0; j<(STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); j++)
{
printf("rgb_stream[%d] = %d\n\r", j, rgb_stream[j]);
}
wr_buf = rgb_stream;
wr = write(fdw, wr_buf, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE); // this write() is 128-bits or 16 bytes which include the empty MSB 8 bits
num_of_pixels_sent = num_of_pixels_sent + PIXEL_NUM_THAT_FITS_STREAM_WIDTH;
}
else // the remaining pixels do not fill all five pixel slots for a 128-bit stream
{
for(rgb_stream_index = 1; (rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL + 1); rgb_stream_index=rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
rgb_stream[rgb_stream_index] = tologic[((rgb_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].B;
rgb_stream[rgb_stream_index+1] = tologic[((rgb_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].G;
rgb_stream[rgb_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1] = tologic[((rgb_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_sent].R;
}
rgb_stream[((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL + 1] = '\0'; // however, this NULL character is not sent across write()
rgb_stream[0] = 0; // remember that the eight most significant bits of the 128-bits stream are ignored by hardware logic
/*for(unsigned int j=0; j<(((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL+1); j++)
{
printf("rgb_stream[%d] = %d\n\r", j, rgb_stream[j]);
}*/
wr_buf = rgb_stream; // this is a partially filled 128-bit stream (with less than 5 pixels)
wr = write(fdw, wr_buf, ((image_width * image_height)-num_of_pixels_sent)*NUM_OF_COMPONENTS_IN_A_PIXEL);
break; // finish sending all (image_width * image_height) pixels
}
if ((wr < 0) && (errno == EINTR))
continue;
if (wr <= 0) {
perror("write() failed");
exit(1);
}
wr_donebytes += wr;
}
sleep(1); // Let debug output drain (if used)
close(fdw);
return 0;
}
else {
close(fdw);
vector<YUV_packet> vFrom(image_width * image_height);
fromlogic = vFrom.data();
//printf("fromlogic[0].Y = %p \n", &fromlogic[0].Y);
if (!fromlogic) {
fprintf(stderr, "Failed to allocate memory\n");
exit(1);
}
rd_buf = (uint8_t *) fromlogic;
rd_donebytes = 0; // this variable includes the empty 8 bits for the MSB
unsigned int num_of_pixels_received = 0; // this is actual pixels number already received, does not include the empty 8 bits
unsigned int yuv_stream_index = 0;
uint8_t *yuv_stream;
while (num_of_pixels_received < image_width * image_height) {
if(((image_width * image_height)-num_of_pixels_received) >= PIXEL_NUM_THAT_FITS_STREAM_WIDTH)
{
yuv_stream = rd_buf + num_of_pixels_received;
//printf("before read() \n");
rd = read(fdr, yuv_stream, STREAM_WIDTH/NUM_OF_BITS_PER_BYTE);
num_of_pixels_received = num_of_pixels_received + PIXEL_NUM_THAT_FITS_STREAM_WIDTH;
//printf("num_of_pixels_received = %d\n\r", num_of_pixels_received);
// For every five pixels (128 bits) received from hardware logic computation, print out the YUV values of all five pixels
/*for(yuv_stream_index = 1; (yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1)<(((image_width * image_height)-num_of_pixels_received)*NUM_OF_COMPONENTS_IN_A_PIXEL + 1); yuv_stream_index=yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL)
{
printf("yuv_stream[%d] = %d\n", yuv_stream_index, fromlogic[((yuv_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].V );
printf("yuv_stream[%d] = %d\n", yuv_stream_index+1, fromlogic[((yuv_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].U );
printf("yuv_stream[%d] = %d\n", yuv_stream_index+NUM_OF_COMPONENTS_IN_A_PIXEL-1, fromlogic[((yuv_stream_index-1)/NUM_OF_COMPONENTS_IN_A_PIXEL)+num_of_pixels_received].Y );
//break; // just to test if there is actually something being read, or returned from hardware.
}*/
//break; // just to test if there is actually something being read, or returned from hardware.
}
else // the remaining pixels do not fill all five pixel slots for a 128-bit stream
{
yuv_stream = rd_buf + num_of_pixels_received;
//printf("before read in else. \n");
rd = read(fdr, yuv_stream, image_width * image_height - num_of_pixels_received); // is a partially filled 128-bit stream (with less than 5 pixels)
//printf("break in else. \n");
break; // finish receiving all (image_width * image_height) pixels
}
if ((rd < 0) && (errno == EINTR))
continue;
if (rd < 0) {
perror("read() failed");
exit(1);
}
if (rd == 0) {
fprintf(stderr, "Reached read EOF!? Should never happen.\n");
exit(0);
}
rd_donebytes += rd;
}
//printf("before for loop\n");
for (unsigned int i = 0; i < (image_width * image_height); i++) // check the perfomance of hardware with respect to software computation
{
#ifdef LOOPBACK
if( (tologic[i].R != fromlogic[i].Y) ||
(tologic[i].G != fromlogic[i].U) ||
(tologic[i].B != fromlogic[i].V) )
#elif RGB2YUV
uint8_t expected_Y = rgb2yuv(tologic[i])->Y;
uint8_t expected_U = rgb2yuv(tologic[i])->U;
uint8_t expected_V = rgb2yuv(tologic[i])->V;
if( (abs(expected_Y - fromlogic[i].Y) > 1) ||
(abs(expected_U - fromlogic[i].U) > 1) ||
(abs(expected_V - fromlogic[i].V) > 1) ) // rgb2yuv conversion hardware tolerance fails by more than 1 compared to software computation
#endif
{
printf("********************************* Attention *************************************\n\r");
printf("R:%d G:%d B:%d \n\r", tologic[i].R, tologic[i].G, tologic[i].B);
printf("Y:%d U:%d V:%d \n\r", fromlogic[i].Y, fromlogic[i].U, fromlogic[i].V);
printf("expected_Y:%d expected_U:%d expected_V:%d \n\r", expected_Y, expected_U, expected_V);
break; // just for troubleshooting
//exit(1);
}
}
sleep(1); // Let debug output drain (if used)
close(fdr);
return 0;
}
}