by Guest »
Hi again,
Thanks for your reply.I am really excited to hear that you'll be releasing revised HLS bundles.This would hopefully make things easier for people like me in new projects.Great initiative!!
Now, I did try out your suggestions related to improving my code.And it increased some number of iterations.I have moved the code for opening fifos outside the infinite loops.I don't close any fifos inside loops.I have also removed the intermediate buf variable and used the framepointer directly in the loop that writes data(to FPGA and to the fifo between parent and child).
However, my webcam feed still freezes after a given number of iterations.I put a cout statement in the loop that writes to the FPGA in the parent process and this is where the process stops every time.Here is the updated code from the previous post.
- Code: Select all
[code]
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#define MAX_COUNT 10
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
using namespace cv;
using namespace std;
int i = 0;
VideoCapture cap(0);
void ChildProcess(void); /* child process prototype */
void ParentProcess(void); /* parent process prototype */
int main()
{
pid_t pid;
pid = fork();
if (pid != 0)
ChildProcess();
else
ParentProcess();
}
void ChildProcess(void)
{
int rows = 500;
int cols = 500;
int nchan = 1;
int fd2 = open("/dev/xillybus_read_32",O_RDONLY);
int fd3 = open("/root/vidpipe",O_WRONLY);
for(;;)
{
Mat img(rows,cols,CV_8UC1);
///Reading the result from FPGA here
int totalbytes = rows*cols*nchan;
int buflen = cols*nchan;
int ret;
uchar buf[buflen];
uchar datarray[totalbytes];
int j;
int k = 0;
int num = totalbytes/buflen;
int bread = 0;
while(bread<totalbytes)
{
ret=read(fd2,buf,buflen);
for ( j = 0 ; j<= (ret-1);j++ )
{
datarray[j+k] = buf[j];
}
k = k+ret;
bread = bread+ret;
}
img.data = datarray;
///Sending results back to the parent here ///
uchar *framepointer = img.data;
int bwritten = 0;
int ret2;
while(bwritten<totalbytes)
{
ret2 = write(fd3,framepointer,buflen);
framepointer = framepointer + ret2;
bwritten = bwritten+ret2;
}
}
}
void ParentProcess(void)
{
int check;
int totalbytes;
int buflen;
int count = 0;
int rows = 500;
int cols = 500;
int nchan = 1;
int fd = open("/dev/xillybus_write_32",O_WRONLY);
if (fd < 1)
{
cout<<"open failed"<<endl;
}
int fd1 = open("/root/vidpipe",O_RDONLY);
for(;;)
{
i++;
cout<<"Parent "<<i<<endl;
Mat img(rows,cols,CV_8UC1);
Mat frame;
cap >> frame;
totalbytes = frame.total()*frame.elemSize();
buflen = cols*nchan;
uchar *framepointer = frame.data;
int bwritten = 0;
int ret;
///Writing Input image to FPGA here///
/*** This loop is where the process gets stuck ***/
while(bwritten<totalbytes)
{
ret = write(fd,framepointer,buflen);
framepointer = framepointer + ret;
bwritten = bwritten+ret;
}
//having or removing this has apparently no effect
write(fd,NULL,0);
/// Receive binary map from child here ///
totalbytes = rows*cols*nchan;
int ret2;
uchar buf2[buflen];
uchar datarray[totalbytes];
int j;
int k = 0;
int bread = 0;
while(bread<totalbytes)
{
ret2=read(fd1,buf2,buflen);
for ( j = 0 ; j<= (ret2-1);j++ )
{
datarray[j+k] = buf2[j];
}
k = k+ret2;
bread = bread+ret2;
}
img.data = datarray;
// Overlay results on the original image here
for( int p = 1; p <= img.rows; p++ )
{ for( int q = 1; q <= img.cols; q++ )
{
if( img.at<uchar>(p,q) == 1 )
{
circle( frame, Point( p, q ), 5, Scalar(255,0,0), 2, 8, 0 );
}
}
}
imshow( "Received image in parent", frame );
waitKey(1);
frame.release();
}
}
[/code]
So, I am pretty sure that it is due to the lack of flow control handling in the c-code for FPGA that I created in Vivado HLS.
Here, is my code for that too.
- Code: Select all
#include <math.h>
#include <stdint.h>
#include "xilly_debug.h"
#include "ap_cint.h"
#include "ap_utils.h"
void xillybus_wrapper(int *in, int *out) {
#pragma AP interface ap_fifo port=in
#pragma AP interface ap_fifo port=out
#pragma AP interface ap_ctrl_none port=return
int x1, x2, x3;
uint8_t y1,y2,y3,y4;
int res;
uint8_t bytes[12];
int thresh = 1000;
float k = 0.04;
const int rows = 240; const int cols = 240;
uint8 rfirst[cols + 2] = {0}; uint8 rproc[cols + 2] = {0}; uint8 rnext[cols + 2] = { 0 }; uint8 rcurr[cols+2] = { 0 };
int ix2_1[cols + 2] = { 0 }; int ix2_2[cols + 2] = { 0 }; int ix2_3[cols + 2] = { 0 };
int iy2_1[cols + 2] = { 0 }; int iy2_2[cols + 2] = { 0 }; int iy2_3[cols + 2] = { 0 };
int ixy_1[cols + 2] = { 0 }; int ixy_2[cols + 2] = { 0 }; int ixy_3[cols + 2] = { 0 };
float ix2_f[cols] = { 0 }; float iy2_f[cols] = { 0 }; float ixy_f[cols] = { 0 };
float r;float cimg_1[cols + 2] = { 0 }; float cimg_2[cols + 2] = { 0 }; float cimg_3[cols + 2] = { 0 };
uint8 output[cols] = { 0 }; int ix; int iy;int ct;
for (int i = 1; i <= (rows + 2); i++)
{
//Read rows and covert to grayscale--for i = 1 load two rows else load 1 row each time.
//At any instant there are 3 rows available for each calculation i.e horizontal and vertical
//derivatives, smoothed derivatives,and corner maps before and after non-maxima suppression.
if (i == 1)
{
for (int u=1;u<=(cols)/4;u++)
{
x1 = *in++;
bytes[0] = (x1 >> 24) & 0xFF;
bytes[1] = (x1 >> 16) & 0xFF;
bytes[2] = (x1 >> 8) & 0xFF;
bytes[3] = x1 & 0xFF;
x2 = *in++;
bytes[4] = (x2 >> 24) & 0xFF;
bytes[5] = (x2 >> 16) & 0xFF;
bytes[6] = (x2 >> 8) & 0xFF;
bytes[7] = x2 & 0xFF;
x3 = *in++;
bytes[8] = (x3 >> 24) & 0xFF;
bytes[9] = (x3 >> 16) & 0xFF;
bytes[10] = (x3 >> 8) & 0xFF;
bytes[11] = x3 & 0xFF;
rproc[ct] = (bytes[0] + bytes[1] + bytes[2])/3;
rproc[ct+1] = (bytes[3] + bytes[7] + bytes[6])/3;
rproc[ct+2] = (bytes[5]+bytes[4]+bytes[11])/3;
rproc[ct+3] = (bytes[10]+bytes[9]+bytes[8])/3;
ct = ct + 4;
}
ct = 1;
for (int v=1;v<=(cols)/4;v++)
{
x1 = *in++;
bytes[0] = (x1 >> 24) & 0xFF;
bytes[1] = (x1 >> 16) & 0xFF;
bytes[2] = (x1 >> 8) & 0xFF;
bytes[3] = x1 & 0xFF;
x2 = *in++;
bytes[4] = (x2 >> 24) & 0xFF;
bytes[5] = (x2 >> 16) & 0xFF;
bytes[6] = (x2 >> 8) & 0xFF;
bytes[7] = x2 & 0xFF;
x3 = *in++;
bytes[8] = (x3 >> 24) & 0xFF;
bytes[9] = (x3 >> 16) & 0xFF;
bytes[10] = (x3 >> 8) & 0xFF;
bytes[11] = x3 & 0xFF;
rnext[ct] = (bytes[0] + bytes[1] + bytes[2])/3;
rnext[ct+1] = (bytes[3] + bytes[7] + bytes[6])/3;
rnext[ct+2] = (bytes[5]+bytes[4]+bytes[11])/3;
rnext[ct+3] = (bytes[10]+bytes[9]+bytes[8])/3;
ct = ct + 4;
}
}
else if (i > 1 && i <= (rows - 1))
{
ct = 1;
//here load one row from the RGB to Grayscale module
for (int r = 1; r <= (cols)/4; r++)
{
x1 = *in++;
bytes[0] = (x1 >> 24) & 0xFF;
bytes[1] = (x1 >> 16) & 0xFF;
bytes[2] = (x1 >> 8) & 0xFF;
bytes[3] = x1 & 0xFF;
x2 = *in++;
bytes[4] = (x2 >> 24) & 0xFF;
bytes[5] = (x2 >> 16) & 0xFF;
bytes[6] = (x2 >> 8) & 0xFF;
bytes[7] = x2 & 0xFF;
x3 = *in++;
bytes[8] = (x3 >> 24) & 0xFF;
bytes[9] = (x3 >> 16) & 0xFF;
bytes[10] = (x3 >> 8) & 0xFF;
bytes[11] = x3 & 0xFF;
rcurr[ct] = (bytes[0] + bytes[1] + bytes[2])/3;
rcurr[ct+1] = (bytes[3] + bytes[7] + bytes[6])/3;
rcurr[ct+2] = (bytes[5]+bytes[4]+bytes[11])/3;
rcurr[ct+3] = (bytes[10]+bytes[9]+bytes[8])/3;
ct = ct + 4;
}
for (int j = 0; j <= cols - 1; j++)
{
rfirst[j + 1] = rproc[j + 1];
rproc[j + 1] = rnext[j + 1];
rnext[j + 1] = rcurr[j+1];
}
}
else if (i == rows)
{
for (int j = 0; j <= cols - 1; j++)
{
rfirst[j + 1] = rproc[j + 1];
rproc[j + 1] = rnext[j + 1];
rnext[j + 1] = 0;
}
}
//Calculating horizontal and vertical derivatives ix,iy,ix2,iy2 and ixy
if (i >= 1 && i <= rows)
{
for (int j = 0; j <= cols - 1; j++)
{
ix2_1[j + 1] = ix2_2[j + 1]; ix2_2[j + 1] = ix2_3[j + 1];
iy2_1[j + 1] = iy2_2[j + 1]; iy2_2[j + 1] = iy2_3[j + 1];
ixy_1[j + 1] = ixy_2[j + 1]; ixy_2[j + 1] = ixy_3[j + 1];
}
for (int m = 0; m <= (cols - 1); m++)
{
ix = abs(rfirst[m] - rfirst[m + 2] + rproc[m] - rproc[m + 2] + rnext[m] - rnext[m + 2]);
iy = abs(rfirst[m] - rnext[m] + rfirst[m + 1] - rnext[m + 1] + rfirst[m + 2] - rnext[m + 2]);
ix2_3[m + 1] = pow(ix,2);
iy2_3[m + 1] = pow(iy,2);
ixy_3[m + 1] = ix*iy;
}
}
else if (i == rows + 1)
{
for (int j = 0; j <= cols - 1; j++)
{
ix2_1[j + 1] = ix2_2[j + 1]; ix2_2[j + 1] = ix2_3[j + 1];
iy2_1[j + 1] = iy2_2[j + 1]; iy2_2[j + 1] = iy2_3[j + 1];
ixy_1[j + 1] = ixy_2[j + 1]; ixy_2[j + 1] = ixy_3[j + 1];
ix2_3[j + 1] = 0; iy2_3[j + 1] = 0; ixy_3[j + 1] = 0;
}
}
//filtering ix2,iy2 and ixy
if (i > 1 && i <= rows + 1)
{
for (int j = 0; j <= cols - 1; j++)
{
cimg_1[j + 1] = cimg_2[j + 1]; cimg_2[j + 1] = cimg_3[j + 1];
cimg_3[j + 1] = 0;
}
for (int m = 1; m <= cols; m++)
{
ix2_f[m - 1] = (0.0113*ix2_1[m - 1] + 0.0838*ix2_1[m] + 0.0113*ix2_1[m + 1] + 0.0838*ix2_2[m - 1] + 0.6193*ix2_2[m] + 0.0838*ix2_2[m + 1] + 0.0113*ix2_3[m - 1] + 0.0838*ix2_3[m] + 0.0113*ix2_3[m + 1]);
iy2_f[m - 1] = (0.0113*iy2_1[m - 1] + 0.0838*iy2_1[m] + 0.0113*iy2_1[m + 1] + 0.0838*iy2_2[m - 1] + 0.6193*iy2_2[m] + 0.0838*iy2_2[m + 1] + 0.0113*iy2_3[m - 1] + 0.0838*iy2_3[m] + 0.0113*iy2_3[m + 1]);
ixy_f[m - 1] = (0.0113*ixy_1[m - 1] + 0.0838*ixy_1[m] + 0.0113*ixy_1[m + 1] + 0.0838*ixy_2[m - 1] + 0.6193*ixy_2[m] + 0.0838*ixy_2[m + 1] + 0.0113*ixy_3[m - 1] + 0.0838*ixy_3[m] + 0.0113*ixy_3[m + 1]);
r = (((ix2_f[m - 1])*( iy2_f[m - 1]) - pow(ixy_f[m - 1],2)) - k*pow((ix2_f[m - 1] + iy2_f[m - 1]),2) );
if (r > thresh)
{
cimg_3[m] = r;
}
}
}
else if (i == rows + 2)
{
for (int j = 0; j <= cols - 1; j++)
{
cimg_1[j + 1] = cimg_2[j + 1]; cimg_2[j + 1] = cimg_3[j + 1];
cimg_3[j + 1] = 0;
}
}
//non maxima suppression of corner map
if (i > 2)
{
for (int m = 1; m <= cols; m++)
{
if (!((cimg_2[m] > cimg_2[m - 1]) && (cimg_2[m] > cimg_2[m + 1]) && (cimg_2[m] > cimg_1[m - 1]) && (cimg_2[m] > cimg_1[m]) && (cimg_2[m] > cimg_1[m + 1]) && (cimg_2[m] > cimg_3[m - 1]) && (cimg_2[m] > cimg_3[m]) && (cimg_2[m] > cimg_3[m + 1])))
{
output[m - 1] = 0;
}
else
{
output[m - 1] = 1;
}
}
ct = 0;
for (int p=1;p<=125;p++)
{
//packing output bytes to an int
res = (output[ct+3] << 24) | (output[ct+2] << 16) | (output[ct+1] << 8) | output[ct];
*out++ = res;
ct = ct+4;
}
}
}
}
Again,please give me a hint about what to do about flow handling here.This might sound silly,but the reason I am asking for hints over and over is that the way I am doing this project is to learn things on the go as they come up.
Another thing is that according to your suggestion I looked at the output from top in a separate terminal while my main program was running and it showed that this process was taking up >= 95% of the CPU for the entire time.So, could the problem be due to this too?
What I am trying to do here is to identify the point (FPGA or Linux) where my problem lies.Once that is done I can take it up from there.
Thanks in advance.
Hi again,
Thanks for your reply.I am really excited to hear that you'll be releasing revised HLS bundles.This would hopefully make things easier for people like me in new projects.Great initiative!!
Now, I did try out your suggestions related to improving my code.And it increased some number of iterations.I have moved the code for opening fifos outside the infinite loops.I don't close any fifos inside loops.I have also removed the intermediate buf variable and used the framepointer directly in the loop that writes data(to FPGA and to the fifo between parent and child).
However, my webcam feed still freezes after a given number of iterations.I put a cout statement in the loop that writes to the FPGA in the parent process and this is where the process stops every time.Here is the updated code from the previous post.
[code][code]
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#define MAX_COUNT 10
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
using namespace cv;
using namespace std;
int i = 0;
VideoCapture cap(0);
void ChildProcess(void); /* child process prototype */
void ParentProcess(void); /* parent process prototype */
int main()
{
pid_t pid;
pid = fork();
if (pid != 0)
ChildProcess();
else
ParentProcess();
}
void ChildProcess(void)
{
int rows = 500;
int cols = 500;
int nchan = 1;
int fd2 = open("/dev/xillybus_read_32",O_RDONLY);
int fd3 = open("/root/vidpipe",O_WRONLY);
for(;;)
{
Mat img(rows,cols,CV_8UC1);
///Reading the result from FPGA here
int totalbytes = rows*cols*nchan;
int buflen = cols*nchan;
int ret;
uchar buf[buflen];
uchar datarray[totalbytes];
int j;
int k = 0;
int num = totalbytes/buflen;
int bread = 0;
while(bread<totalbytes)
{
ret=read(fd2,buf,buflen);
for ( j = 0 ; j<= (ret-1);j++ )
{
datarray[j+k] = buf[j];
}
k = k+ret;
bread = bread+ret;
}
img.data = datarray;
///Sending results back to the parent here ///
uchar *framepointer = img.data;
int bwritten = 0;
int ret2;
while(bwritten<totalbytes)
{
ret2 = write(fd3,framepointer,buflen);
framepointer = framepointer + ret2;
bwritten = bwritten+ret2;
}
}
}
void ParentProcess(void)
{
int check;
int totalbytes;
int buflen;
int count = 0;
int rows = 500;
int cols = 500;
int nchan = 1;
int fd = open("/dev/xillybus_write_32",O_WRONLY);
if (fd < 1)
{
cout<<"open failed"<<endl;
}
int fd1 = open("/root/vidpipe",O_RDONLY);
for(;;)
{
i++;
cout<<"Parent "<<i<<endl;
Mat img(rows,cols,CV_8UC1);
Mat frame;
cap >> frame;
totalbytes = frame.total()*frame.elemSize();
buflen = cols*nchan;
uchar *framepointer = frame.data;
int bwritten = 0;
int ret;
///Writing Input image to FPGA here///
/*** This loop is where the process gets stuck ***/
while(bwritten<totalbytes)
{
ret = write(fd,framepointer,buflen);
framepointer = framepointer + ret;
bwritten = bwritten+ret;
}
//having or removing this has apparently no effect
write(fd,NULL,0);
/// Receive binary map from child here ///
totalbytes = rows*cols*nchan;
int ret2;
uchar buf2[buflen];
uchar datarray[totalbytes];
int j;
int k = 0;
int bread = 0;
while(bread<totalbytes)
{
ret2=read(fd1,buf2,buflen);
for ( j = 0 ; j<= (ret2-1);j++ )
{
datarray[j+k] = buf2[j];
}
k = k+ret2;
bread = bread+ret2;
}
img.data = datarray;
// Overlay results on the original image here
for( int p = 1; p <= img.rows; p++ )
{ for( int q = 1; q <= img.cols; q++ )
{
if( img.at<uchar>(p,q) == 1 )
{
circle( frame, Point( p, q ), 5, Scalar(255,0,0), 2, 8, 0 );
}
}
}
imshow( "Received image in parent", frame );
waitKey(1);
frame.release();
}
}
[/code][/code]
So, I am pretty sure that it is due to the lack of flow control handling in the c-code for FPGA that I created in Vivado HLS.
Here, is my code for that too.
[code]
#include <math.h>
#include <stdint.h>
#include "xilly_debug.h"
#include "ap_cint.h"
#include "ap_utils.h"
void xillybus_wrapper(int *in, int *out) {
#pragma AP interface ap_fifo port=in
#pragma AP interface ap_fifo port=out
#pragma AP interface ap_ctrl_none port=return
int x1, x2, x3;
uint8_t y1,y2,y3,y4;
int res;
uint8_t bytes[12];
int thresh = 1000;
float k = 0.04;
const int rows = 240; const int cols = 240;
uint8 rfirst[cols + 2] = {0}; uint8 rproc[cols + 2] = {0}; uint8 rnext[cols + 2] = { 0 }; uint8 rcurr[cols+2] = { 0 };
int ix2_1[cols + 2] = { 0 }; int ix2_2[cols + 2] = { 0 }; int ix2_3[cols + 2] = { 0 };
int iy2_1[cols + 2] = { 0 }; int iy2_2[cols + 2] = { 0 }; int iy2_3[cols + 2] = { 0 };
int ixy_1[cols + 2] = { 0 }; int ixy_2[cols + 2] = { 0 }; int ixy_3[cols + 2] = { 0 };
float ix2_f[cols] = { 0 }; float iy2_f[cols] = { 0 }; float ixy_f[cols] = { 0 };
float r;float cimg_1[cols + 2] = { 0 }; float cimg_2[cols + 2] = { 0 }; float cimg_3[cols + 2] = { 0 };
uint8 output[cols] = { 0 }; int ix; int iy;int ct;
for (int i = 1; i <= (rows + 2); i++)
{
//Read rows and covert to grayscale--for i = 1 load two rows else load 1 row each time.
//At any instant there are 3 rows available for each calculation i.e horizontal and vertical
//derivatives, smoothed derivatives,and corner maps before and after non-maxima suppression.
if (i == 1)
{
for (int u=1;u<=(cols)/4;u++)
{
x1 = *in++;
bytes[0] = (x1 >> 24) & 0xFF;
bytes[1] = (x1 >> 16) & 0xFF;
bytes[2] = (x1 >> 8) & 0xFF;
bytes[3] = x1 & 0xFF;
x2 = *in++;
bytes[4] = (x2 >> 24) & 0xFF;
bytes[5] = (x2 >> 16) & 0xFF;
bytes[6] = (x2 >> 8) & 0xFF;
bytes[7] = x2 & 0xFF;
x3 = *in++;
bytes[8] = (x3 >> 24) & 0xFF;
bytes[9] = (x3 >> 16) & 0xFF;
bytes[10] = (x3 >> 8) & 0xFF;
bytes[11] = x3 & 0xFF;
rproc[ct] = (bytes[0] + bytes[1] + bytes[2])/3;
rproc[ct+1] = (bytes[3] + bytes[7] + bytes[6])/3;
rproc[ct+2] = (bytes[5]+bytes[4]+bytes[11])/3;
rproc[ct+3] = (bytes[10]+bytes[9]+bytes[8])/3;
ct = ct + 4;
}
ct = 1;
for (int v=1;v<=(cols)/4;v++)
{
x1 = *in++;
bytes[0] = (x1 >> 24) & 0xFF;
bytes[1] = (x1 >> 16) & 0xFF;
bytes[2] = (x1 >> 8) & 0xFF;
bytes[3] = x1 & 0xFF;
x2 = *in++;
bytes[4] = (x2 >> 24) & 0xFF;
bytes[5] = (x2 >> 16) & 0xFF;
bytes[6] = (x2 >> 8) & 0xFF;
bytes[7] = x2 & 0xFF;
x3 = *in++;
bytes[8] = (x3 >> 24) & 0xFF;
bytes[9] = (x3 >> 16) & 0xFF;
bytes[10] = (x3 >> 8) & 0xFF;
bytes[11] = x3 & 0xFF;
rnext[ct] = (bytes[0] + bytes[1] + bytes[2])/3;
rnext[ct+1] = (bytes[3] + bytes[7] + bytes[6])/3;
rnext[ct+2] = (bytes[5]+bytes[4]+bytes[11])/3;
rnext[ct+3] = (bytes[10]+bytes[9]+bytes[8])/3;
ct = ct + 4;
}
}
else if (i > 1 && i <= (rows - 1))
{
ct = 1;
//here load one row from the RGB to Grayscale module
for (int r = 1; r <= (cols)/4; r++)
{
x1 = *in++;
bytes[0] = (x1 >> 24) & 0xFF;
bytes[1] = (x1 >> 16) & 0xFF;
bytes[2] = (x1 >> 8) & 0xFF;
bytes[3] = x1 & 0xFF;
x2 = *in++;
bytes[4] = (x2 >> 24) & 0xFF;
bytes[5] = (x2 >> 16) & 0xFF;
bytes[6] = (x2 >> 8) & 0xFF;
bytes[7] = x2 & 0xFF;
x3 = *in++;
bytes[8] = (x3 >> 24) & 0xFF;
bytes[9] = (x3 >> 16) & 0xFF;
bytes[10] = (x3 >> 8) & 0xFF;
bytes[11] = x3 & 0xFF;
rcurr[ct] = (bytes[0] + bytes[1] + bytes[2])/3;
rcurr[ct+1] = (bytes[3] + bytes[7] + bytes[6])/3;
rcurr[ct+2] = (bytes[5]+bytes[4]+bytes[11])/3;
rcurr[ct+3] = (bytes[10]+bytes[9]+bytes[8])/3;
ct = ct + 4;
}
for (int j = 0; j <= cols - 1; j++)
{
rfirst[j + 1] = rproc[j + 1];
rproc[j + 1] = rnext[j + 1];
rnext[j + 1] = rcurr[j+1];
}
}
else if (i == rows)
{
for (int j = 0; j <= cols - 1; j++)
{
rfirst[j + 1] = rproc[j + 1];
rproc[j + 1] = rnext[j + 1];
rnext[j + 1] = 0;
}
}
//Calculating horizontal and vertical derivatives ix,iy,ix2,iy2 and ixy
if (i >= 1 && i <= rows)
{
for (int j = 0; j <= cols - 1; j++)
{
ix2_1[j + 1] = ix2_2[j + 1]; ix2_2[j + 1] = ix2_3[j + 1];
iy2_1[j + 1] = iy2_2[j + 1]; iy2_2[j + 1] = iy2_3[j + 1];
ixy_1[j + 1] = ixy_2[j + 1]; ixy_2[j + 1] = ixy_3[j + 1];
}
for (int m = 0; m <= (cols - 1); m++)
{
ix = abs(rfirst[m] - rfirst[m + 2] + rproc[m] - rproc[m + 2] + rnext[m] - rnext[m + 2]);
iy = abs(rfirst[m] - rnext[m] + rfirst[m + 1] - rnext[m + 1] + rfirst[m + 2] - rnext[m + 2]);
ix2_3[m + 1] = pow(ix,2);
iy2_3[m + 1] = pow(iy,2);
ixy_3[m + 1] = ix*iy;
}
}
else if (i == rows + 1)
{
for (int j = 0; j <= cols - 1; j++)
{
ix2_1[j + 1] = ix2_2[j + 1]; ix2_2[j + 1] = ix2_3[j + 1];
iy2_1[j + 1] = iy2_2[j + 1]; iy2_2[j + 1] = iy2_3[j + 1];
ixy_1[j + 1] = ixy_2[j + 1]; ixy_2[j + 1] = ixy_3[j + 1];
ix2_3[j + 1] = 0; iy2_3[j + 1] = 0; ixy_3[j + 1] = 0;
}
}
//filtering ix2,iy2 and ixy
if (i > 1 && i <= rows + 1)
{
for (int j = 0; j <= cols - 1; j++)
{
cimg_1[j + 1] = cimg_2[j + 1]; cimg_2[j + 1] = cimg_3[j + 1];
cimg_3[j + 1] = 0;
}
for (int m = 1; m <= cols; m++)
{
ix2_f[m - 1] = (0.0113*ix2_1[m - 1] + 0.0838*ix2_1[m] + 0.0113*ix2_1[m + 1] + 0.0838*ix2_2[m - 1] + 0.6193*ix2_2[m] + 0.0838*ix2_2[m + 1] + 0.0113*ix2_3[m - 1] + 0.0838*ix2_3[m] + 0.0113*ix2_3[m + 1]);
iy2_f[m - 1] = (0.0113*iy2_1[m - 1] + 0.0838*iy2_1[m] + 0.0113*iy2_1[m + 1] + 0.0838*iy2_2[m - 1] + 0.6193*iy2_2[m] + 0.0838*iy2_2[m + 1] + 0.0113*iy2_3[m - 1] + 0.0838*iy2_3[m] + 0.0113*iy2_3[m + 1]);
ixy_f[m - 1] = (0.0113*ixy_1[m - 1] + 0.0838*ixy_1[m] + 0.0113*ixy_1[m + 1] + 0.0838*ixy_2[m - 1] + 0.6193*ixy_2[m] + 0.0838*ixy_2[m + 1] + 0.0113*ixy_3[m - 1] + 0.0838*ixy_3[m] + 0.0113*ixy_3[m + 1]);
r = (((ix2_f[m - 1])*( iy2_f[m - 1]) - pow(ixy_f[m - 1],2)) - k*pow((ix2_f[m - 1] + iy2_f[m - 1]),2) );
if (r > thresh)
{
cimg_3[m] = r;
}
}
}
else if (i == rows + 2)
{
for (int j = 0; j <= cols - 1; j++)
{
cimg_1[j + 1] = cimg_2[j + 1]; cimg_2[j + 1] = cimg_3[j + 1];
cimg_3[j + 1] = 0;
}
}
//non maxima suppression of corner map
if (i > 2)
{
for (int m = 1; m <= cols; m++)
{
if (!((cimg_2[m] > cimg_2[m - 1]) && (cimg_2[m] > cimg_2[m + 1]) && (cimg_2[m] > cimg_1[m - 1]) && (cimg_2[m] > cimg_1[m]) && (cimg_2[m] > cimg_1[m + 1]) && (cimg_2[m] > cimg_3[m - 1]) && (cimg_2[m] > cimg_3[m]) && (cimg_2[m] > cimg_3[m + 1])))
{
output[m - 1] = 0;
}
else
{
output[m - 1] = 1;
}
}
ct = 0;
for (int p=1;p<=125;p++)
{
//packing output bytes to an int
res = (output[ct+3] << 24) | (output[ct+2] << 16) | (output[ct+1] << 8) | output[ct];
*out++ = res;
ct = ct+4;
}
}
}
}
[/code]
Again,please give me a hint about what to do about flow handling here.This might sound silly,but the reason I am asking for hints over and over is that the way I am doing this project is to learn things on the go as they come up.
Another thing is that according to your suggestion I looked at the output from top in a separate terminal while my main program was running and it showed that this process was taking up >= 95% of the CPU for the entire time.So, could the problem be due to this too?
What I am trying to do here is to identify the point (FPGA or Linux) where my problem lies.Once that is done I can take it up from there.
Thanks in advance.