by Guest »
Hello,
I am working with my Xillybus on my platform with a PCIe 1x, and my performance measurement is beyond the peak!!
Here is the situation:
I have a loopback application in C that writes a large array to the device and reads it back (I make sure I read all of the elements back completely and correctly). When I do this once the measurement gives me 80 MB/s (which can be logical) but when I do this in a loop and calculate the average time, it gives me around 800 MB/s which is simply impossible on a PCIe 1x. I also re-intialize the array (change all the values) as a precaution for caching effects. I know I'm missing something, but I can't see what! Does anyone has any idea what I might be missing?
A simple version of my code:
- Code: Select all
for (i=0; i<EVAL_RUNS; i++) {
// Initialize buffer
int j;
for(j=0; j<BUFF_SIZE; j++) buf[j] = 'a'+i+j%70;
gettimeofday(&start_w, NULL);
// Write to device
allwrite(fdw, buf, BUFF_SIZE); // WRITE ALL
// Read from device
rc = allread(fdr, buf2, BUFF_SIZE); // READ ALL
gettimeofday(&end_r, NULL);
// Read everything remained just in case
sleep(1); // just in case!
allread(fdr, buf2+rc, BUFF_SIZE-rc);
// Check integrity
for(i=0; i<rc; i++) if(buf[i] != buf2[i]) printf("Mismatch: %c <-> %c\n", buf[i], buf2[i]);
// ERROR CHECKING
if ((rc < 0) && (errno == EINTR))
exit(0);
if (rc < 0) {
perror("allread() failed to read");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached read EOF.\n");
exit(0);
}
sum_wr += ((end_r.tv_sec * 1000000 + end_r.tv_usec)
- (start_w.tv_sec * 1000000 + start_w.tv_usec));
}
long long cpu_time_wr = sum_wr/EVAL_RUNS;
Hello,
I am working with my Xillybus on my platform with a PCIe 1x, and my performance measurement is beyond the peak!!
Here is the situation:
I have a loopback application in C that writes a large array to the device and reads it back (I make sure I read all of the elements back completely and correctly). When I do this once the measurement gives me 80 MB/s (which can be logical) but when I do this in a loop and calculate the average time, it gives me around 800 MB/s which is simply impossible on a PCIe 1x. I also re-intialize the array (change all the values) as a precaution for caching effects. I know I'm missing something, but I can't see what! Does anyone has any idea what I might be missing?
A simple version of my code:
[code]
for (i=0; i<EVAL_RUNS; i++) {
// Initialize buffer
int j;
for(j=0; j<BUFF_SIZE; j++) buf[j] = 'a'+i+j%70;
gettimeofday(&start_w, NULL);
// Write to device
allwrite(fdw, buf, BUFF_SIZE); // WRITE ALL
// Read from device
rc = allread(fdr, buf2, BUFF_SIZE); // READ ALL
gettimeofday(&end_r, NULL);
// Read everything remained just in case
sleep(1); // just in case!
allread(fdr, buf2+rc, BUFF_SIZE-rc);
// Check integrity
for(i=0; i<rc; i++) if(buf[i] != buf2[i]) printf("Mismatch: %c <-> %c\n", buf[i], buf2[i]);
// ERROR CHECKING
if ((rc < 0) && (errno == EINTR))
exit(0);
if (rc < 0) {
perror("allread() failed to read");
exit(1);
}
if (rc == 0) {
fprintf(stderr, "Reached read EOF.\n");
exit(0);
}
sum_wr += ((end_r.tv_sec * 1000000 + end_r.tv_usec)
- (start_w.tv_sec * 1000000 + start_w.tv_usec));
}
long long cpu_time_wr = sum_wr/EVAL_RUNS;
[/code]