I'm not sure if it's good place to ask, but I have no idea where can get help.
Currently I'm running Xillybus on Virtex5 (ML507 proto board).
In my design, i need to send continuously ~120MB/s data from pc to fpga, and ~1MB/s from fpga to pc.
I've generated core from IP Core Factory (attached below), and wrote simply code in C to write and capture data and measure time.
My core have device 150MB/s bandwidth to fpga, while i can get only ~75MB/s.
Same situation is with my devices (xillybus_md_upload, xillybus_md_upload_my, xillybus_md_download, xillybus_md_download_my), and xillybus default devices (xillybus_write_32 and xillybus_read_32).
I'm using ubuntu 12.04 32bit on Athlon 64 X2 4800+ CPU.
In code below i'm trying to recieve same data as send, but I've tried also sending a lot of data to fpga and recieve only 1 byte per 160K bytes send.
Any help will be appreciated.
Core specification:
- Code: Select all
------- /dev/xillybus_read_32
Upstream (FPGA to host):
Data width: 32 bits
DMA buffers: 32 x 128 kB = 4 MB
Flow control: Asynchronous, select() and non-blocking read() supported
Seekable: No
------- /dev/xillybus_write_32
Downstream (host to FPGA):
Data width: 32 bits
DMA buffers: 32 x 128 kB = 4 MB
Flow control: Asynchronous
Seekable: No
FPGA RAM for DMA acceleration: 4 segments x 512 bytes = 2 kB
------- /dev/xillybus_read_8
Upstream (FPGA to host):
Data width: 8 bits
DMA buffers: 4 x 4 kB = 16 kB
Flow control: Asynchronous, select() and non-blocking read() supported
Seekable: No
------- /dev/xillybus_write_8
Downstream (host to FPGA):
Data width: 8 bits
DMA buffers: 4 x 4 kB = 16 kB
Flow control: Asynchronous
Seekable: No
FPGA RAM for DMA acceleration: None
------- /dev/xillybus_mem_8
Upstream (FPGA to host):
Data width: 8 bits
DMA buffers: 4 x 4 kB = 16 kB
Flow control: Synchronous
Seekable: Yes, with 5 address bits
Downstream (host to FPGA):
Data width: 8 bits
DMA buffers: 4 x 4 kB = 16 kB
Flow control: Synchronous
Seekable: Yes, with 5 address bits
FPGA RAM for DMA acceleration: None
------- /dev/xillybus_md_download
Downstream (host to FPGA):
Data width: 32 bits
DMA buffers: 32 x 128 kB = 4 MB
Flow control: Asynchronous
Seekable: No
FPGA RAM for DMA acceleration: 4 segments x 512 bytes = 2 kB
------- /dev/xillybus_md_download_my
Downstream (host to FPGA):
Data width: 32 bits
DMA buffers: 64 x 128 kB = 8 MB
Flow control: Asynchronous
Seekable: No
FPGA RAM for DMA acceleration: 4 segments x 512 bytes = 2 kB
------- /dev/xillybus_md_upload
Upstream (FPGA to host):
Data width: 8 bits
DMA buffers: 4 x 4 kB = 16 kB
Flow control: Synchronous
Seekable: No
------- /dev/xillybus_md_upload_my
Upstream (FPGA to host):
Data width: 8 bits
DMA buffers: 4 x 128 bytes = 512 bytes
Flow control: Asynchronous, select() and non-blocking read() supported
Seekable: No
C program code:
- Code: Select all
#include <semaphore.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <pthread.h>
#define MEGABYTE 1024*1024
#define SQUARE_100 40000
//#define DEBUG_MODE
//#define WORD_CHECK
//to calculate size of data sent, multiply words_number and fpga_word_width
//
//words number should:
// -be bigger or equal fpga_response_factor
//- can be divided by fpga_response_factor
#define words_number MEGABYTE*10*10
#define fpga_word_width 4
#define fpga_response_factor 0.25 //must match defined in fpga
#ifdef WORD_CHECK
#if words_number < fpga_response_factor
#error Not enough words_number or fpga_response_factor too big
#elif (words_number%fpga_response_factor)!= 0
#error words_number not dividable by fpga_response_factor
#endif
#endif
int writeFd;
int readFd;
int rcv_started = 0;
int snd_started = 0;
int start_sending = 0;
char *mem;
const long memsize = (words_number)*fpga_word_width; //in fact - sendsize
const long rcvsize = words_number/fpga_response_factor;
struct timeval start_write, start_read, end_write, end_read;
long mtime_write, mtime_read;
void* rcvThread(void* arg)
{
long rc;
int num;
long bytes_read = 0;
long seconds, useconds;
rcv_started = 1;
char* buff;
buff = malloc(rcvsize);
if(buff == 0)
{
perror("Cannot allocate memory for recieve");
}
#ifdef DEBUG_MODE
printf("Read thread started!\n");
fflush(stdout);
#endif
gettimeofday(&start_read, NULL);
while (1) {
#ifdef DEBUG_MODE
printf("Bytes read: %d \n", bytes_read);
#endif
rc = read(readFd, buff+bytes_read, rcvsize-bytes_read);
bytes_read += rc;
if(bytes_read >= rcvsize)
{
//count elapsed time
gettimeofday(&end_read, NULL);
seconds = end_read.tv_sec - start_read.tv_sec;
useconds = end_read.tv_usec - start_read.tv_usec;
mtime_read = ((seconds) * 1000 + useconds/1000.0) + 0.5;
#ifdef DEBUG_MODE
printf("Data read done!");
fflush(stdout);
#endif
break;
}
if ((rc < 0) && (errno == EINTR))
continue;
if (rc < 0) {
perror("read() failed");
break;
}
if (rc == 0) {
fprintf(stderr, "Reached read EOF.\n");
break;
}
}
return NULL;
}
void* sndThread(void* arg)
{
int rc;
long seconds, useconds;
snd_started = 1;
gettimeofday(&start_write, NULL);
long bytes_written = 0;
while (bytes_written < memsize) {
#ifdef DEBUG_MODE
printf("Bytes: %d \n", bytes_written);
#endif
rc = write(writeFd, mem+bytes_written, memsize-bytes_written);
if ((rc < 0) && (errno == EINTR))
continue;
if (rc < 0) {
perror("write() failed");
break;
}
if (rc == 0) {
fprintf(stderr, "Reached write EOF (?!)\n");
break;
}
bytes_written += rc;
}
//flushing
while(1) {
rc = write(writeFd, mem+bytes_written,0);
if((rc < 0) && (errno == EINTR))
continue;
if(rc < 0)
{
perror("Flush failed!");
printf("bytes: %d\n\n", bytes_written);
break;
}
break;
}
gettimeofday(&end_write, NULL);
seconds = end_write.tv_sec - start_write.tv_sec;
useconds = end_write.tv_usec - start_write.tv_usec;
mtime_write = ((seconds) * 1000 + useconds/1000.0) + 0.5;
#ifdef DEBUG_MODE
printf("Writing finished!: %d written", bytes_written);
fflush(stdout);
#endif
return NULL;
}
int main(int argc, char* argv[])
{
pthread_t rcvThreadId, sndThreadId, mgmtThreadId;
if (argc != 3)
{
printf("Usage: %s write_file read_file\n", argv[0]);
exit(1);
}
writeFd = open(argv[1], O_WRONLY);
if (writeFd < 0) {
perror("Failed to open write file");
exit(1);
}
readFd = open(argv[2], O_RDONLY);
if (readFd < 0) {
perror("Failed to open write file");
exit(1);
}
mem = (char*)malloc(memsize);
if(mem == 0)
{
perror("Failed to allocate memory");
exit(1);
}
if (pthread_create(&sndThreadId, NULL, sndThread, NULL)) {
perror("Failed to create send thread");
exit(1);
}
pthread_join(sndThreadId, NULL);
printf("Write time: %ld mili\n", mtime_write);
fflush(stdout);
if (pthread_create(&rcvThreadId, NULL, rcvThread, NULL)) {
perror("Failed to create recieve thread");
exit(1);
}
pthread_join(rcvThreadId, NULL);
printf("Recieve time: %ld mili\n", mtime_read);
fflush(stdout);
return 0;
}
fpga code (part with sending back data for my device)
- Code: Select all
always @ (posedge bus_clk)
begin
upload_fifo_wr_en = 0;
if(!user_r_md_upload_open && !user_w_md_download_open)
counter = 0;
else if(user_w_md_download_wren == 1'b1) begin
counter = counter + 1;
end
if(counter == 40000) begin
counter = 0;
upload_fifo_wr_en = 1;
end
end
assign user_r_md_upload_eof = 0;
fifo_8x2048 md_upload
(
.clk(bus_clk),
.srst(!user_r_md_upload_open && !user_w_md_download_open),
.din(7),
.wr_en(upload_fifo_wr_en),
.rd_en(user_r_md_upload_rden),
.dout(user_r_md_upload_data),
.full(user_w_md_download_full),
.empty(user_r_md_upload_empty)
);