I'm making an assembly language about Haar DWT and as you know, haar dwt needs a matrix represented from an image. I want an assembly language that able to put 64x64 or 128x128 or even 256x256 matrix. How do I do that?
I am new to this assembly thing, let alone to use DLX microprocessor. I have the assembly code for the calculation of the DWT itself. But somehow the biggest matrix I can put in haar_in array is 4x4 matrix/16 values only. I don't know if it's the memory thing, or the fact that I calculate big floating point that is too heavy for the program, but I can't get it done. I have tried to change the space of haar_out, the M and N, but it didn't work too.
.data
sqrt:
.double 1.4142
haar_in:
.double 148, 142, 130, 168, 166, 117, 138, 120, 116, 118, 109, 136, 99, 89, 123, 120
haar_out:
.space 10000
M:
.word 4
N:
.word 4
K:
.word 1
; Register Usage
; GP:
; R0 => 0
; R1 => M
; R2 => N
; R3.. => Internally used
; FP:
; F0 => 0
; F8 => Square Root of 2
; F2.. => Internally used
main:
ld f8, sqrt(r0) ; F8 contains square root of 2
lw r1, M(r0) ; Get value of M in R1
lw r2, N(r0) ; Get value of N in R2
addi r3, r0, 0 ; Set R3 to 0 as a counter for M
addi r4, r0, 0 ; Set R4 to 0 as a counter for N
; Copy the haar_in to the haar_out array
copy_n_haar_out:
sub r2, r2, r4
beqz r2, n_loop_exit ; Exit N-loop if counter reached N
lw r2, N(r0) ; Get value of N in R2
slli r4, r4, 3 ; Align to the next pointer
lw r1, M(r0) ; Get value of M in R1
addi r3, r0, 0 ; Set R3 to 0 as a counter for M
copy_m_haar_out:
sub r1, r1, r3
beqz r1, m_loop_exit ; Exit M-loop if counter reached M
lw r1, M(r0) ; Get value of M in R1
slli r3, r3, 3 ; Align to the next pointer
mult r5, r4, r1 ; Form R5 = (j*m)
add r5, r5, r3 ; R5 = i + j*m
ld f2, haar_in(r5) ; Get the array from haar array
sd haar_out(r5), f2 ; Store it in internal array
srli r3, r3, 3 ; Get the original count back
addi r3, r3, 1 ; Increment the count by 1
j copy_m_haar_out ; Loop back
m_loop_exit:
srli r4, r4, 3 ; Get the original count back
addi r4, r4, 1 ; Increment the count by 1
j copy_n_haar_out ; Loop back
n_loop_exit:
lw r1, M(r0) ; Get value of M in R1
lw r2, N(r0) ; Get value of N in R2
; Determine K, the largest power of 2 such that K <= M
lw r3, K(r0) ; Get the initial value of K in R3
k_less_than_m:
slli r5, r3, 1 ; R5 = K*2
slt r4, r1, r5 ; Set R4 if M <= K
subi r4, r4, 1 ; Check if R4 is set
beqz r4, k_loop_m_exit ; Exit loop if set
sub r4, r1, r3 ; Check if K == M
beqz r4, k_loop_m_exit ; Exit loop if equal
slli r3, r3, 1 ; K = K*2
j k_less_than_m ; Loop back
k_loop_m_exit:
sw K(r0), r3 ; Store the value in K
; Transform all the columns now
lw r6, K(r0) ; Get the updated value of K in R6
addi r3, r0, 0 ; Set R3 to 0 as a counter for K
addi r4, r0, 0 ; Set R4 to 0 as a counter for N
col_transform:
slti r7, r6, 1 ; Set R7 if K < 1
bnez r7, col_trans_exit ; Exit Loop if R7 is set
srli r6, r6, 1 ; K = K/2
sw K(r0), r6 ; Store value of K
; Perform the actual transform
col_trans_n:
sub r2, r2, r4
beqz r2, col_trans_n_exit ; Exit N-loop if counter reached N
lw r2, N(r0) ; Get value of N in R2
slli r4, r4, 3 ; Align to the next pointer
lw r6, K(r0) ; Get value of K in R6
addi r3, r0, 0 ; Set R3 to 0 as a counter for M
col_trans_k:
sub r6, r6, r3
beqz r6, col_trans_k_exit ; Exit K-loop if counter reached K
lw r6, K(r0) ; Get value of K in R6
slli r3, r3, 3 ; Align to the next pointer
slli r5, r3, 1 ; Form R5 = (2*i)
mult r8, r4, r1 ; Form R8 = (j*m)
add r9, r5, r8 ; R9 = (2*i) + (j*m)
ld f2, haar_in(r9) ; Get the value from haar array in F2
addi r9, r9, 8 ; Move to the next index
ld f6, haar_in(r9) ; Get the next value from array in F6
addd f4, f2, f6 ; Add the results in F4
divd f4, f4, f8 ; F4 = F4/sqrt(2)
subd f10, f2, f6 ; Sub the results in F10
divd f10, f10, f8 ; F10 = F10/sqrt(2)
add r5, r3, r8 ; Form R5 = i + j*m
sd haar_out(r5), f4 ; Store the result in out array
slli r6, r6, 3 ; Form the array index in K
add r5, r5, r6 ; Form R5 = (k+i+j*m)
sd haar_out(r5), f10 ; Store it in internal array
srli r6, r6, 3 ; Get the original value back
srli r3, r3, 3 ; Get the original count back
addi r3, r3, 1 ; Increment the count by 1
j col_trans_k ; Loop back
col_trans_k_exit:
srli r4, r4, 3 ; Get the original count back
addi r4, r4, 1 ; Increment the count by 1
j col_trans_n ; Loop back
col_trans_n_exit:
j col_transform
col_trans_exit:
This is my .data, so whenever I'm calculating a number, I will load dp from haar_in to an FP register, then the result of the calculation will be stored in haar_out. Whenever I put more that 4x4 matrix, the message will be a timeout, an overflow, or illegal number. Help..