Is memcpy a really function with symbol?

338 views Asked by At

This simple c:

#include <stdio.h>
#include <string.h>
int *add(int a, int b){
    int ar[1];
    int result = a+b;
    memcpy(ar, &result, sizeof(int));
    return ar;
}

int main(){
    int a = add(1,2)[0];
    printf("%i\n",a);
}

is compiled into this:

.text
    .globl  add
    .type   add, @function
add:
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    movl    %edi, -20(%rbp) # a, a
    movl    %esi, -24(%rbp) # b, b
# a.c:5:    int result = a+b;
    movl    -20(%rbp), %edx # a, tmp91
    movl    -24(%rbp), %eax # b, tmp92
    addl    %edx, %eax  # tmp91, _1
# a.c:5:    int result = a+b;
    movl    %eax, -8(%rbp)  # _1, result
# a.c:6:    memcpy(ar, &result, sizeof(int)); ---I SEE NO CALL INSTRUCTION---
    movl    -8(%rbp), %eax  # MEM[(char * {ref-all})&result], _6
    movl    %eax, -4(%rbp)  # _6, MEM[(char * {ref-all})&ar]
# a.c:7:    return ar;
    movl    $0, %eax    #--THE FUNCTION SHOULD RETURN ADDRESS OF ARRAY, NOT 0. OTHERWISE command terminated
#   lea -4(%rbp), %rax  #--ONLY THIS IS CORRECT, NOT `0`
# a.c:8: }
    popq    %rbp    #
    ret 
    .size   add, .-add
    .section    .rodata
.LC0:
    .string "%i\n"
    .text
    .globl  main
    .type   main, @function
main:
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    $16, %rsp   #,
# a.c:11:   int a = add(1,2)[0];
    movl    $2, %esi    #,
    movl    $1, %edi    #,
    call    add #
# a.c:11:   int a = add(1,2)[0];
    movl    (%rax), %eax    # *_1, tmp90
    movl    %eax, -4(%rbp)  # tmp90, a
# a.c:12:   printf("%i\n",a);
    movl    -4(%rbp), %eax  # a, tmp91
    movl    %eax, %esi  # tmp91,
    leaq    .LC0(%rip), %rdi    #,
    movl    $0, %eax    #,
    call    printf@PLT  #
    movl    $0, %eax    #, _6
# a.c:13: }
    leave   
    ret 
    .size   main, .-main
    .ident  "GCC: (Debian 8.3.0-6) 8.3.0"
    .section    .note.GNU-stack,"",@progbits

Every function from stdlib like printf or puts are called from GOT (i.e. %rip register holds the address of GOT). But not memcpy, it is like "assembly inline instructions" instead of regular call address. So is memcpy even a symbol? If so, why is it not as argument to call? Is memcpy in GOT table? If so, what is a offset from GOT to that symbol?

1

There are 1 answers

3
zwol On BEST ANSWER

So first off, you have a bug:

$ cc -O2 -S test.c
test.c: In function ‘add’:
test.c:7:12: warning: function returns address of local variable

Returning the address of a local variable has undefined behavior, if and only if the caller uses that value; this is why your compiler generated code that returned a null pointer, which will crash the program if used but be harmless otherwise. In fact, my copy of GCC generates only this for add:

add:
        xorl    %eax, %eax
        ret

because that treatment of the return value makes the other operations in add be dead code.

(The "only if used" restriction is also why my compiler generates a warning, not a hard error.)

Now, if I modify your program to have well-defined behavior, e.g.

#include <stdio.h>
#include <string.h>

void add(int *sum, int a, int b)
{
    int result = a+b;
    memcpy(sum, &result, sizeof(int));
}

int main(void)
{
    int a;
    add(&a, 1, 2);
    printf("%i\n",a);
    return 0;
}

then I do indeed see assembly code in which the memcpy call has been replaced by inline code:

add:
    addl    %edx, %esi
    movl    %esi, (%rdi)
    ret

This is a feature of many modern C compilers: they know what some of the C library's functions do, and can inline them when that makes sense. (You can see that in this case the generated code is both smaller and faster than it would have been with an actual call to memcpy.)

GCC lets me turn this feature off with a command-line option:

$ gcc -O2 -ffreestanding test.c
$ sed -ne '/^add:/,/cfi_endproc/{; /^\.LF[BE]/d; /\.cfi_/d; p; }' test.s
add:
    subq    $24, %rsp
    addl    %edx, %esi
    movl    $4, %edx
    movl    %esi, 12(%rsp)
    leaq    12(%rsp), %rsi
    call    memcpy@PLT
    addq    $24, %rsp
    ret

In this mode, the call to memcpy in add is treated the same as the call to printf in main. Your compiler may have similar options.