源碼是官方的2014.4 TRD工程里的,整個工程是基于zc702板子的,但手里只有塊小zybo >_< 里面的硬件設計很有參考價值,最近想用FPGA加速surf算法,先在這分析下TRD工程里sobel edge detection的例程。
wiki
Top Function
這里不同于xapp1167,直接調用hls::cv的庫函數,sobel邊緣提取算法是重新實現的,更方便了解hls的算法實現的特點。
void image_filter(AXI_STREAM& video_in, AXI_STREAM& video_out, int rows, int cols,
int C_XR0C0, int C_XR0C1, int C_XR0C2, int C_XR1C0, int C_XR1C1, int C_XR1C2, int C_XR2C0, int C_XR2C1, int C_XR2C2,
int C_YR0C0, int C_YR0C1, int C_YR0C2, int C_YR1C0, int C_YR1C1, int C_YR1C2, int C_YR2C0, int C_YR2C1, int C_YR2C2,
int c_high_thresh, int c_low_thresh, int c_invert)
{
//Create AXI streaming interfaces for the core
//這里定義axi-stream接口用于stream圖像數據
#pragma HLS INTERFACE axis port=video_in bundle=INPUT_STREAM
#pragma HLS INTERFACE axis port=video_out bundle=OUTPUT_STREAM
//設置rows、cols 為axilite總線上的寄存器,用于改變處理圖像的大小(圖像的最大尺寸為1920*1080)
#pragma HLS INTERFACE s_axilite port=rows bundle=CONTROL_BUS offset=0x14
#pragma HLS INTERFACE s_axilite port=cols bundle=CONTROL_BUS offset=0x1C
#pragma HLS INTERFACE s_axilite port=return bundle=CONTROL_BUS
//#pragma HLS INTERFACE ap_stable port=rows
//#pragma HLS INTERFACE ap_stable port=cols
//設置sobel算子x、y方向的濾波模板 方便PS端改變模板(比如可以改成Prewitt算子)
#pragma HLS INTERFACE s_axilite port= C_XR0C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR0C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR0C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR1C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR1C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR1C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR2C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR2C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_XR2C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR0C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR0C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR0C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR1C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR1C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR1C2 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR2C0 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR2C1 bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= C_YR2C2 bundle=CONTROL_BUS
//x、y閾值
#pragma HLS INTERFACE s_axilite port= c_high_thresh bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= c_low_thresh bundle=CONTROL_BUS
#pragma HLS INTERFACE s_axilite port= c_invert bundle=CONTROL_BUS
YUV_IMAGE img_0(rows, cols);
YUV_IMAGE img_1(rows, cols);
#pragma HLS dataflow
//將axi-stream 轉換為 hls::mat (hls::mat是數據流的形式)
hls::AXIvideo2Mat(video_in, img_0);
//sobel edge detection implement
sobel_filter_core(img_0, img_1, rows, cols,
C_XR0C0, C_XR0C1, C_XR0C2, C_XR1C0, C_XR1C1, C_XR1C2, C_XR2C0, C_XR2C1, C_XR2C2,
C_YR0C0, C_YR0C1, C_YR0C2, C_YR1C0, C_YR1C1, C_YR1C2, C_YR2C0, C_YR2C1, C_YR2C2,
c_high_thresh, c_low_thresh, c_invert);
//hls::mat 轉換為axi-stream輸出
hls::Mat2AXIvideo(img_1, video_out);
}
top function 是一個標準的hls 圖像處理結構,具體內容請參看xapp1167文檔
sobel_filter_core
void sobel_filter_core(YUV_IMAGE& src, YUV_IMAGE& dst, int rows, int cols,
int C_XR0C0, int C_XR0C1, int C_XR0C2, int C_XR1C0, int C_XR1C1, int C_XR1C2, int C_XR2C0, int C_XR2C1, int C_XR2C2,
int C_YR0C0, int C_YR0C1, int C_YR0C2, int C_YR1C0, int C_YR1C1, int C_YR1C2, int C_YR2C0, int C_YR2C1, int C_YR2C2,
int c_high_thresh, int c_low_thresh, int c_invert)
{
Y_BUFFER buff_A;
Y_WINDOW buff_C;
//Y_BUFFER Y_WINDOW 定義如下
//typedef hls::Window<3, 3, unsigned char>
Y_WINDOW;
//typedef hls::LineBuffer<3, MAX_WIDTH, unsigned char> Y_BUFFER;
//hls特有的memory結構 具體特征說明見下方
for(int row = 0; row < rows+1; row++){
for(int col = 0; col < cols+1; col++){
#pragma HLS loop_flatten off
// loop_flatten 選項說明
//Allows nested loops to be collapsed into a single loop with improved latency.
//
#pragma HLS dependence variable=&buff_A false
// dependence 選項說明
//Used to provide additional information that can overcome loop-carry dependencies and allow loops to be pipelined (or pipelined with lower intervals).
#pragma HLS PIPELINE II = 1
// PIPELINE 選項說明
//Reduces the initiation interval by allowing the concurrent execution of operations within a loop or function.
//流水的迭代次數為1
// Temp values are used to reduce the number of memory reads
unsigned char temp;
YUV_PIXEL tempx;
評論
查看更多