Non Restoring Division in Iverilog

78 views Asked by At

I am trying to implement the non-restoring division algorithm in iVerilog. I am able to get the desired final outputs but the intermediate outputs are wrong. What I mean by intermediate outputs are the values stored in the accumulator (A) and dividend(Q) bits after each clock cycle. This is the Algorithm

This is the code for the circuit

module left_shift(input wire clk,reset,input wire[16:0]A,input wire[15:0]Q,output wire[16:0]A_ls,output wire[15:0]Q_ls);

dfr dfrA_ls0(clk,reset,A[15],A_ls[16]);
dfr dfrA_ls1(clk,reset,A[14],A_ls[15]);
dfr dfrA_ls2(clk,reset,A[13],A_ls[14]);
dfr dfrA_ls3(clk,reset,A[12],A_ls[13]);
dfr dfrA_ls4(clk,reset,A[11],A_ls[12]);
dfr dfrA_ls5(clk,reset,A[10],A_ls[11]);
dfr dfrA_ls6(clk,reset,A[9],A_ls[10]);
dfr dfrA_ls7(clk,reset,A[8],A_ls[9]);
dfr dfrA_ls8(clk,reset,A[7],A_ls[8]);
dfr dfrA_ls9(clk,reset,A[6],A_ls[7]);
dfr dfrA_ls10(clk,reset,A[5],A_ls[6]);
dfr dfrA_ls11(clk,reset,A[4],A_ls[5]);
dfr dfrA_ls12(clk,reset,A[3],A_ls[4]);
dfr dfrA_ls13(clk,reset,A[2],A_ls[3]);
dfr dfrA_ls14(clk,reset,A[1],A_ls[2]);
dfr dfrA_ls15(clk,reset,A[0],A_ls[1]);
dfr dfrA_ls16(clk,reset,Q[15],A_ls[0]);

dfr dfrQ_ls0(clk,reset,Q[14],Q_ls[15]);
dfr dfrQ_ls1(clk,reset,Q[13],Q_ls[14]);
dfr dfrQ_ls2(clk,reset,Q[12],Q_ls[13]);
dfr dfrQ_ls3(clk,reset,Q[11],Q_ls[12]);
dfr dfrQ_ls4(clk,reset,Q[10],Q_ls[11]);
dfr dfrQ_ls5(clk,reset,Q[9],Q_ls[10]);
dfr dfrQ_ls6(clk,reset,Q[8],Q_ls[9]);
dfr dfrQ_ls7(clk,reset,Q[7],Q_ls[8]);
dfr dfrQ_ls8(clk,reset,Q[6],Q_ls[7]);
dfr dfrQ_ls9(clk,reset,Q[5],Q_ls[6]);
dfr dfrQ_ls10(clk,reset,Q[4],Q_ls[5]);
dfr dfrQ_ls11(clk,reset,Q[3],Q_ls[4]);
dfr dfrQ_ls12(clk,reset,Q[2],Q_ls[3]);
dfr dfrQ_ls13(clk,reset,Q[1],Q_ls[2]);
dfr dfrQ_ls14(clk,reset,Q[0],Q_ls[1]);

endmodule

module cycle_div(input wire clk,reset,input wire[16:0]A,M,input wire[15:0]Q,output wire[15:0]quo,output wire[16:0]rem);

    wire A_MSB;
    assign A_MSB=A[16];
    wire A_Msb_Op;
    invert inv_MSB(A_MSB,A_Msb_Op);

    wire [16:0]B;

    xor2 xor_0(M[0],A_Msb_Op,B[0]);
    xor2 xor_1(M[1],A_Msb_Op,B[1]);
    xor2 xor_2(M[2],A_Msb_Op,B[2]);
    xor2 xor_3(M[3],A_Msb_Op,B[3]);
    xor2 xor_4(M[4],A_Msb_Op,B[4]);
    xor2 xor_5(M[5],A_Msb_Op,B[5]);
    xor2 xor_6(M[6],A_Msb_Op,B[6]);
    xor2 xor_7(M[7],A_Msb_Op,B[7]);
    xor2 xor_8(M[8],A_Msb_Op,B[8]);
    xor2 xor_9(M[9],A_Msb_Op,B[9]);
    xor2 xor_10(M[10],A_Msb_Op,B[10]);
    xor2 xor_11(M[11],A_Msb_Op,B[11]);
    xor2 xor_12(M[12],A_Msb_Op,B[12]);
    xor2 xor_13(M[13],A_Msb_Op,B[13]);
    xor2 xor_14(M[14],A_Msb_Op,B[14]);
    xor2 xor_15(M[15],A_Msb_Op,B[15]);
    xor2 xor_16(M[16],A_Msb_Op,B[16]);

    wire[16:0]A_ls;

    left_shift ls(clk,reset,A,Q,A_ls,quo);

    wire[16:0]carry;

    fa fa_0(A_ls[0],B[0],A_Msb_Op,rem[0],carry[0]);
    fa fa_1(A_ls[1],B[1],carry[0],rem[1],carry[1]);
    fa fa_2(A_ls[2],B[2],carry[1],rem[2],carry[2]);
    fa fa_3(A_ls[3],B[3],carry[2],rem[3],carry[3]);
    fa fa_4(A_ls[4],B[4],carry[3],rem[4],carry[4]);
    fa fa_5(A_ls[5],B[5],carry[4],rem[5],carry[5]);
    fa fa_6(A_ls[6],B[6],carry[5],rem[6],carry[6]);
    fa fa_7(A_ls[7],B[7],carry[6],rem[7],carry[7]);
    fa fa_8(A_ls[8],B[8],carry[7],rem[8],carry[8]);
    fa fa_9(A_ls[9],B[9],carry[8],rem[9],carry[9]);
    fa fa_10(A_ls[10],B[10],carry[9],rem[10],carry[10]);
    fa fa_11(A_ls[11],B[11],carry[10],rem[11],carry[11]);
    fa fa_12(A_ls[12],B[12],carry[11],rem[12],carry[12]);
    fa fa_13(A_ls[13],B[13],carry[12],rem[13],carry[13]);
    fa fa_14(A_ls[14],B[14],carry[13],rem[14],carry[14]);
    fa fa_15(A_ls[15],B[15],carry[14],rem[15],carry[15]);
    fa fa_16(A_ls[16],B[16],carry[15],rem[16],carry[16]);


    wire x=1'b1;
    xor2 xor_LSB(x,rem[16],quo[0]);

endmodule

module division(input wire clk,reset,input wire[15:0]dividend,divisor,output wire[15:0]quotient,output wire[16:0]remainder);


wire [16:0]A;
wire x=1'b1;
wire y=1'b0;

dfr dfrA_0(clk,reset,y,A[0]);
dfr dfrA_1(clk,reset,y,A[1]);
dfr dfrA_2(clk,reset,y,A[2]);
dfr dfrA_3(clk,reset,y,A[3]);
dfr dfrA_4(clk,reset,y,A[4]);
dfr dfrA_5(clk,reset,y,A[5]);
dfr dfrA_6(clk,reset,y,A[6]);
dfr dfrA_7(clk,reset,y,A[7]);
dfr dfrA_8(clk,reset,y,A[8]);
dfr dfrA_9(clk,reset,y,A[9]);
dfr dfrA_10(clk,reset,y,A[10]);
dfr dfrA_11(clk,reset,y,A[11]);
dfr dfrA_12(clk,reset,y,A[12]);
dfr dfrA_13(clk,reset,y,A[13]);
dfr dfrA_14(clk,reset,y,A[14]);
dfr dfrA_15(clk,reset,y,A[15]);
dfr dfrA_16(clk,reset,y,A[16]);


wire [16:0]A_temp1;
wire [16:0]A_temp2;
wire [16:0]A_temp3;
wire [16:0]A_temp4;
wire [16:0]A_temp5;
wire [16:0]A_temp6;
wire [16:0]A_temp7;
wire [16:0]A_temp8;
wire [16:0]A_temp9;
wire [16:0]A_temp10;
wire [16:0]A_temp11;
wire [16:0]A_temp12;
wire [16:0]A_temp13;
wire [16:0]A_temp14;
wire [16:0]A_temp15;
wire [16:0]A_temp16;


wire [16:0]M;

assign M=divisor;

wire [15:0]Q;
wire [15:0]Q_temp[0:15];
assign Q=dividend;

cycle_div div_0(clk,reset,A,M,Q,Q_temp[0],A_temp1);
cycle_div div_1(clk,reset,A_temp1,M,Q_temp[0],Q_temp[1],A_temp2);
cycle_div div_2(clk,reset,A_temp2,M,Q_temp[1],Q_temp[2],A_temp3);
cycle_div div_3(clk,reset,A_temp3,M,Q_temp[2],Q_temp[3],A_temp4);
cycle_div div_4(clk,reset,A_temp4,M,Q_temp[3],Q_temp[4],A_temp5);
cycle_div div_5(clk,reset,A_temp5,M,Q_temp[4],Q_temp[5],A_temp6);
cycle_div div_6(clk,reset,A_temp6,M,Q_temp[5],Q_temp[6],A_temp7);
cycle_div div_7(clk,reset,A_temp7,M,Q_temp[6],Q_temp[7],A_temp8);
cycle_div div_8(clk,reset,A_temp8,M,Q_temp[7],Q_temp[8],A_temp9);
cycle_div div_9(clk,reset,A_temp9,M,Q_temp[8],Q_temp[9],A_temp10);
cycle_div div_10(clk,reset,A_temp10,M,Q_temp[9],Q_temp[10],A_temp11);
cycle_div div_11(clk,reset,A_temp11,M,Q_temp[10],Q_temp[11],A_temp12);
cycle_div div_12(clk,reset,A_temp12,M,Q_temp[11],Q_temp[12],A_temp13);
cycle_div div_13(clk,reset,A_temp13,M,Q_temp[12],Q_temp[13],A_temp14);
cycle_div div_14(clk,reset,A_temp14,M,Q_temp[13],Q_temp[14],A_temp15);
cycle_div div_15(clk,reset,A_temp15,M,Q_temp[14],quotient,A_temp16);

wire [16:0]B;

mux2 mux_0(y,M[0],A_temp16[16],B[0]);
mux2 mux_1(y,M[1],A_temp16[16],B[1]);
mux2 mux_2(y,M[2],A_temp16[16],B[2]);
mux2 mux_3(y,M[3],A_temp16[16],B[3]);
mux2 mux_4(y,M[4],A_temp16[16],B[4]);
mux2 mux_5(y,M[5],A_temp16[16],B[5]);
mux2 mux_6(y,M[6],A_temp16[16],B[6]);
mux2 mux_7(y,M[7],A_temp16[16],B[7]);
mux2 mux_8(y,M[8],A_temp16[16],B[8]);
mux2 mux_9(y,M[9],A_temp16[16],B[9]);
mux2 mux_10(y,M[10],A_temp16[16],B[10]);
mux2 mux_11(y,M[11],A_temp16[16],B[11]);
mux2 mux_12(y,M[12],A_temp16[16],B[12]);
mux2 mux_13(y,M[13],A_temp16[16],B[13]);
mux2 mux_14(y,M[14],A_temp16[16],B[14]);
mux2 mux_15(y,M[15],A_temp16[16],B[15]);
mux2 mux_16(y,M[16],A_temp16[16],B[16]);

wire[16:0]carry;

fa fa_f0(A_temp16[0],B[0],y,remainder[0],carry[0]);
fa fa_f1(A_temp16[1],B[1],carry[0],remainder[1],carry[1]);
fa fa_f2(A_temp16[2],B[2],carry[1],remainder[2],carry[2]);
fa fa_f3(A_temp16[3],B[3],carry[2],remainder[3],carry[3]);
fa fa_f4(A_temp16[4],B[4],carry[3],remainder[4],carry[4]);
fa fa_f5(A_temp16[5],B[5],carry[4],remainder[5],carry[5]);
fa fa_f6(A_temp16[6],B[6],carry[5],remainder[6],carry[6]);
fa fa_f7(A_temp16[7],B[7],carry[6],remainder[7],carry[7]);
fa fa_f8(A_temp16[8],B[8],carry[7],remainder[8],carry[8]);
fa fa_f9(A_temp16[9],B[9],carry[8],remainder[9],carry[9]);
fa fa_f10(A_temp16[10],B[10],carry[9],remainder[10],carry[10]);
fa fa_f11(A_temp16[11],B[11],carry[10],remainder[11],carry[11]);
fa fa_f12(A_temp16[12],B[12],carry[11],remainder[12],carry[12]);
fa fa_f13(A_temp16[13],B[13],carry[12],remainder[13],carry[13]);
fa fa_f14(A_temp16[14],B[14],carry[13],remainder[14],carry[14]);
fa fa_f15(A_temp16[15],B[15],carry[14],remainder[15],carry[15]);
fa fa_f16(A_temp16[16],B[16],carry[15],remainder[16],carry[16]);
endmodule

Test-bench :

`timescale 1ns/100ps
`define TESTVECS 8

module tb;
    reg clk,reset;
    initial begin
        $dumpfile("tb_nr_div.vcd");
        $dumpvars(0,tb);
    end
    initial begin #5 reset = 1'b1; #10 reset = 1'b0;end
    initial clk = 1'b0; always #5 clk =~ clk;
    integer i;
    reg[15:0]dividend;
    reg[15:0]divisor;
    wire[15:0]quotient;
    wire[16:0]remainder;
    reg[31:0]test_vecs[0:(`TESTVECS-1)];
    initial begin
        test_vecs[0][31:16]=16'h15;test_vecs[0][15:0]=16'h4; 
        test_vecs[1][31:16]=16'h7;test_vecs[1][15:0]=16'h3;
        test_vecs[2][31:16]=16'hFFFF;test_vecs[2][15:0]=16'h9A9A;
        test_vecs[3][31:16]=16'hABCD;test_vecs[3][15:0]=16'h1111;
        test_vecs[4][31:16]=16'h1234;test_vecs[4][15:0]=16'h4321; //Divisor > Dividend
        test_vecs[5][31:16]=16'hFFF1;test_vecs[5][15:0]=16'h2755; //Prime divided by prime
        test_vecs[6][31:16]=16'hFFFF;test_vecs[6][15:0]=16'hFFFF; //2^16-1 divided by 2^16-1
        test_vecs[7][31:16]=16'hFFFF;test_vecs[7][15:0]=16'h0001; //2^16-1 divided by 1
    end
    division division_0(clk,reset,dividend,divisor,quotient,remainder);
    initial begin
        #5 for(i=0;i<`TESTVECS;i=i+1)begin
            reset=1'b1;
            #10 reset=1'b0;
            #170 {dividend,divisor}=test_vecs[i];
        end
    #200 $finish;
    end
endmodule

Library file :

module xor2(input wire i0,i1,output wire o);
assign o = i0^i1;
endmodule

module and2(input wire i0,i1, output wire o);
assign o = i0&i1;
endmodule

module or2(input wire i0,i1,output wire o);
assign o=i0|i1;
endmodule

module invert(input wire a,output wire b);
assign b=!a;
endmodule

module df (input wire clk, in, output wire out);
  reg df_out;
  always@(posedge clk) df_out <= in;
  assign out = df_out;
endmodule

module dfr (input wire clk, reset, in, output wire out);
  wire reset_, df_in;
  invert invert_0 (reset, reset_);
  and2 and2_0 (in, reset_, df_in);
  df df_0 (clk, df_in, out);
endmodule

module fa(input wire a,b,cin,output wire sum,cout);
assign sum=a^b^cin;
assign cout= a&b | a&cin | b&cin;
endmodule

module mux2(input wire i0,i1,sel, output wire o);
assign o= (sel==0)?i0:i1;
endmodule

This is the GtkWave GtkWave for the first test-vector. This is GtkWave for the second test-vector The first 7 rows are the outputs and inputs required.

The next few rows show the intermediate values that need to be stored in the accumulator. However as you can see , the accumulator A_tempX is storing some values before Clock Cycle X is executed. Please help me figure out where I am going wrong.

0

There are 0 answers