I want to use dsp resource as much as possible, I wrote such code, and the compiler followed my pragmas:
#include "ap_fixed.h"
#include "hls_stream.h"
#include "hls_half.h"
#include "hls_math.h"
#include "ap_int.h"
#include "hls_vector.h"
#include <cmath>
using namespace std;
const int EMBEDDING_SIZE = 64;
typedef hls::vector<signed char, EMBEDDING_SIZE> token_t;
struct half_2{
half x;
half y;
};
void layernorm (hls::stream<token_t> & in, hls::stream<half_2> & out) {
auto token_packet = in.read();
half mean_sum = 0.f, var_sum = 0.f;
for (int i_pack = 0; i_pack < EMBEDDING_SIZE; i_pack++){
#pragma HLS PIPELINE II=2
half ele = token_packet[i_pack];
#pragma HLS BIND_OP variable=mean_sum op=hadd impl=dsp
mean_sum += ele;
half ele_square;
#pragma HLS BIND_OP variable=ele_square op=hmul impl=dsp
ele_square = ele * ele;
#pragma HLS BIND_OP variable=var_sum op=hadd impl=dsp
var_sum += ele_square;
}
half_2 res = {mean_sum, var_sum};
out.write(res);
}
the C synthesis result is shown as below:
But if I add some code below the original code and run C synthesis, the orginal code no longer uses fabrics instead, and only the code I added use dsp, so what is the problem and how to fix it? Thanks!
void layernorm (hls::stream<token_t> & in, hls::stream<half_2> & out)
{
auto token_packet = in.read();
half mean_sum = 0.f, var_sum = 0.f;
for (int i_pack = 0; i_pack < EMBEDDING_SIZE; i_pack++){
#pragma HLS PIPELINE II=2
half ele = token_packet[i_pack];
#pragma HLS BIND_OP variable=mean_sum op=hadd impl=dsp
mean_sum += ele;
half ele_square;
#pragma HLS BIND_OP variable=ele_square op=hmul impl=dsp
ele_square = ele * ele;
#pragma HLS BIND_OP variable=var_sum op=hadd impl=dsp
var_sum += ele_square;
}
half mean, var, mean_square, sub_mean_square, sqrt_var, final_val, EPS=1e-5;
#pragma HLS BIND_OP variable=mean op=hmul impl=dsp
#pragma HLS BIND_OP variable=var op=hmul impl=dsp
#pragma HLS BIND_OP variable=mean_square op=hmul impl=dsp
#pragma HLS BIND_OP variable=sub_mean_square op=hsub impl=dsp
#pragma HLS BIND_OP variable=sqrt_var op=hsqrt impl=fabric
#pragma HLS BIND_OP variable=final_val op=hadd impl=dsp
mean = mean_sum / EMBEDDING_SIZE;
var = var_sum / EMBEDDING_SIZE;
mean_square = mean * mean;
sub_mean_square = var - mean_square;
sqrt_var = hls::half_rsqrt(sub_mean_square);
final_val = sqrt_var + EPS;
half_2 res = {mean, final_val};
out.write(res);
}
By the way, the vitis version is 2023.1 and the board is xcvu19p