I started taking an online course regarding software security. In one of the sections, I was instructed to execute a hidden C function using a buffer overflow. I got to thinking: what would happen if I could pass machine instruction directly to a stack unsafe executable?
What I have been trying, or where I am at now:
(Simple routine or function compiled or assembled into an object,
then printed to screen with `objdump`)
>>> x = "984579273698529424576299" # open("file.o").read()
>>> for i in range(0, len(x), 2):
... print "\\x" + x[i:i+2],
...
\x98 \x45 \x79 \x27 \x36 \x98 \x52 \x94 \x24 \x57 \x62 \x99
int main(void) {
unsigned char shellcode[] = { <formatted shellcode bytes from objdump> };
void (*fn)(void) = (void (*)(void))shellcode;
fn();
return 0;
}
An actual example of some sequences I've tried:
hello.c
int main(void)
{
char buf[] = {'H', 'e', 'l', 'l', 'o', '\n', '\0'};
write(1, buf, sizeof(buf));
exit(0);
}
shellforge2.py (a work in progress)
import os
import re
import sys
src = sys.argv[1]
asmsrc = src[:src.find(".")] + ".s"
binobj = src[:src.find(".")] + ".o"
call = "gcc -march=i386 -O3 -S -fPIC -Winline " + \
"-finline-functions -ffreestanding " + \
"-o %s -m32 %s" % (asmsrc, src)
print call
print
f = os.popen(call)
f.close()
asm = open(asmsrc).readlines()
ignores = (".file", ".def")
asm_stripped = []
for line in asm:
write = True
for ignore in ignores:
if ignore in line: write = False
if write: print line.replace("\n", "")
ret = os.system("gcc -c -o %s %s" % (binobj, asmsrc))
f = os.popen("objdump -j .text -s -z %s" % (binobj, ))
objdump = f.readlines()
f.close()
regx = re.compile("^ [0-9a-f]{4}")
regxret = ""
for line in objdump:
if regx.match(line):
regxret = regxret + "".join(line[:42].split()[1:])
dumphex = []
while regxret:
dumphex.append(regxret[:2])
regxret = regxret[2:]
print dumphex
result = ["unsigned char shellcode[] = {",]
for ch in dumphex[:-1]:
result.append("'\\x%s', " % ch)
result.append("'\\x%s' };" % dumphex[-1:][0])
print "".join(result)
shell command and output:
$ python shellforge2.py hello.c
gcc -march=i386 -O3 -S -fPIC -Winline -finline-functions -ffreestanding -o hello.s -m32 hello.c
.text
.p2align 2,,3
.globl main
.type main, @function
main:
leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
pushl %ecx
subl $28, %esp
call .L3
.L3:
popl %ebx
addl $_GLOBAL_OFFSET_TABLE_+[.-.L3], %ebx
leal -23(%ebp), %edi
leal C.0.751@GOTOFF(%ebx), %esi
movl $7, %ecx
rep movsb
pushl $7
leal -23(%ebp), %eax
pushl %eax
pushl $1
call write@PLT
movl $0, (%esp)
call exit@PLT
addl $16, %esp
leal -16(%ebp), %esp
popl %ecx
popl %ebx
popl %esi
popl %edi
leave
leal -4(%ecx), %esp
ret
.size main, .-main
.section .rodata
.type C.0.751, @object
.size C.0.751, 7
C.0.751:
.byte 72
.byte 101
.byte 108
.byte 108
.byte 111
.byte 10
.byte 0
.ident "GCC: (Ubuntu 4.3.3-5ubuntu4) 4.3.3"
.section .note.GNU-stack,"",@progbits
['8d', '4c', '24', '04', '83', 'e4', 'f0', 'ff', '71', 'fc', '55', '89', 'e5', '57', '56', '53', '51', '83', 'ec', '1c', 'e8', '00', '00', '00', '00', '5b', '81', 'c3', '03', '00', '00', '00', '8d', '7d', 'e9', '8d', 'b3', '00', '00', '00', '00', 'b9', '07', '00', '00', '00', 'f3', 'a4', '6a', '07', '8d', '45', 'e9', '50', '6a', '01', 'e8', 'fc', 'ff', 'ff', 'ff', 'c7', '04', '24', '00', '00', '00', '00', 'e8', 'fc', 'ff', 'ff', 'ff', '83', 'c4', '10', '8d', '65', 'f0', '59', '5b', '5e', '5f', 'c9', '8d', '61', 'fc', 'c3']
unsigned char shellcode[] = {'\x8d', '\x4c', '\x24', '\x04', '\x83', '\xe4', '\xf0', '\xff', '\x71', '\xfc', '\x55', '\x89', '\xe5', '\x57', '\x56', '\x53', '\x51', '\x83', '\xec', '\x1c', '\xe8', '\x00', '\x00', '\x00', '\x00', '\x5b', '\x81', '\xc3', '\x03', '\x00', '\x00', '\x00', '\x8d', '\x7d', '\xe9', '\x8d', '\xb3', '\x00', '\x00', '\x00', '\x00', '\xb9', '\x07', '\x00', '\x00', '\x00', '\xf3', '\xa4', '\x6a', '\x07', '\x8d', '\x45', '\xe9', '\x50', '\x6a', '\x01', '\xe8', '\xfc', '\xff', '\xff', '\xff', '\xc7', '\x04', '\x24', '\x00', '\x00', '\x00', '\x00', '\xe8', '\xfc', '\xff', '\xff', '\xff', '\x83', '\xc4', '\x10', '\x8d', '\x65', '\xf0', '\x59', '\x5b', '\x5e', '\x5f', '\xc9', '\x8d', '\x61', '\xfc', '\xc3' };
testshell.c
int main(void) {
unsigned char shellcode[] = {'\x8d', '\x4c', '\x24', '\x04', '\x83', '\xe4', '\xf0', '\xff', '\x71', '\xfc', '\x55', '\x89', '\xe5', '\x57', '\x56', '\x53', '\x51', '\x83', '\xec', '\x1c', '\xe8', '\x00', '\x00', '\x00', '\x00', '\x5b', '\x81', '\xc3', '\x03', '\x00', '\x00', '\x00', '\x8d', '\x7d', '\xe9', '\x8d', '\xb3', '\x00', '\x00', '\x00', '\x00', '\xb9', '\x07', '\x00', '\x00', '\x00', '\xf3', '\xa4', '\x6a', '\x07', '\x8d', '\x45', '\xe9', '\x50', '\x6a', '\x01', '\xe8', '\xfc', '\xff', '\xff', '\xff', '\xc7', '\x04', '\x24', '\x00', '\x00', '\x00', '\x00', '\xe8', '\xfc', '\xff', '\xff', '\xff', '\x83', '\xc4', '\x10', '\x8d', '\x65', '\xf0', '\x59', '\x5b', '\x5e', '\x5f', '\xc9', '\x8d', '\x61', '\xfc', '\xc3' };
int (*_main)(void) = (int (*)(void))shellcode;
_main();
return 0;
}
hello.c
is converted from C source into an array of instructions with shellforge2.py
, which are pasted into testshell.c
. testshell.c
is compiled and executed.
$ ./testshell
Illegal instruction
I hope I made this question clear.
The problem with creating shellcode from C programs is not that you cannot control the assembly generated, nor something related with the code generation.
The problem with creating shellcode from C programs is symbols resolution or relocation, call it whatever you like.
You approach, for what I have understand, is right, you are just using the wrong code or in a different view, you want too much.
I'm not going to explain how the loading of an image works but briefly when you use a function like
write
the assembly generated is acall ADDRESS
instruction but the address is not compiled yet, it is just a relative offset that will be resolved by the loader at run time with the help of the structures found on the image (see PE, ELF).Your shell code don't get loaded by the OS (it is a program inside a program) and so its symbols are not resolved. Look at this:
This is the call to the
write
function in your hello.c as it is executed by stepping with GDB. Note that the call is at28cbfch
and that the callee should be at28cbfd
,i.e. just a byte after the start of the instruction. This is not possible as the instruction itself takes 5 bytes, this means that the call towrite
is coded ascall -4
, i.e. a relative address not yet resolved by the loader.You will learn in your course that shell code usually use system calls directly in Linux with
int 80h
on IA32 platform. If you substitute the call towrite
with a system call invocation your shell code should (may be other issues, don't trust me) work.Fun fact: I was expecting the stack to not be executable (look for NX bit for more info) by default but in cygwin it was indeed. You can use
--execstack
to be sure, for ELF file, that the stack is executable.