How can I get conditional branch slot, in which an instruction from before or after the branch is moved to fill in the slot, using mipsel-openwrt-linux-gcc cross compiler?
I just use the command to get the MIPS code:
./mipsel-openwrt-linux-gcc -O2 -fno-delayed-branch -S ha.c;
However, I just get nop instructions after bne instructions. The -O2 and -fno-delayed-branch options seem not working.
Here is the content of ha.c:
int intcompare(int *x, int *y)
{
if (*x < *y)
return -1;
else if (*x > *y)
return 1;
else return 0;
}
int mod1(int x, int N)
{
if (x >= N)
x -= N;
return x;
}
int main()
{
return 0;
}
and here is the content of ha.s
.file 1 "ha.c"
.section .mdebug.abi32
.previous
.gnu_attribute 4, 3
.abicalls
.option pic0
.text
.align 2
.globl intcompare
.set nomips16
.ent intcompare
.type intcompare, @function
intcompare:
.frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, gp= 0
.mask 0x00000000,0
.fmask 0x00000000,0
.set noreorder
.set nomacro
lw $2,0($4)
lw $3,0($5)
nop
slt $4,$2,$3
bne $4,$0,$L3
nop
slt $2,$3,$2
j $31
nop
$L3:
li $2,-1 # 0xffffffffffffffff
j $31
nop
.set macro
.set reorder
.end intcompare
.size intcompare, .-intcompare
.align 2
.globl mod1
.set nomips16
.ent mod1
.type mod1, @function
mod1:
.frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, gp= 0
.mask 0x00000000,0
.fmask 0x00000000,0
.set noreorder
.set nomacro
slt $3,$4,$5
move $2,$4
bne $3,$0,$L6
nop
subu $2,$4,$5
$L6:
j $31
nop
.set macro
.set reorder
.end mod1
.size mod1, .-mod1
.section .text.startup,"ax",@progbits
.align 2
.globl main
.set nomips16
.ent main
.type main, @function
main:
.frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, gp= 0
.mask 0x00000000,0
.fmask 0x00000000,0
.set noreorder
.set nomacro
move $2,$0
j $31
nop
.set macro
.set reorder
.end main
.size main, .-main
.ident "GCC: (OpenWrt/Linaro GCC 4.8-2014.04 r44162) 4.8.3"
Looking at the compiler output, none of the preceding instructions can be moved into the branch delay slots, so the compiler has no choice but to fill the delay slots with
nop
instructions.Here is an example that will use a branch delay slot (when compiled with -O or higher):
mipsel-linux-uclibc-objdump output:
What gcc optimization options were you using?